From d249e67a6a7c36a2ffcf08839810a8efa4603f18 Mon Sep 17 00:00:00 2001 From: Victor Campos Date: Wed, 5 Nov 2025 11:14:02 +0000 Subject: [PATCH 01/39] [libc][math] Disable `FEnvSafeTest.cpp` if AArch64 target has no FP support (#166370) The `FEnvSafeTest.cpp` test fails on AArch64 soft nofp configurations because LLVM libc does not provide a floating-point environment in these configurations. This patch adds another preprocessor guard on `__ARM_FP` to disable the test on those. --- libc/test/UnitTest/FEnvSafeTest.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libc/test/UnitTest/FEnvSafeTest.cpp b/libc/test/UnitTest/FEnvSafeTest.cpp index 4393f9d5e5c3b..64f50d7be7fe3 100644 --- a/libc/test/UnitTest/FEnvSafeTest.cpp +++ b/libc/test/UnitTest/FEnvSafeTest.cpp @@ -43,7 +43,8 @@ void FEnvSafeTest::set_fenv(const fenv_t &fenv) { void FEnvSafeTest::expect_fenv_eq(const fenv_t &before_fenv, const fenv_t &after_fenv) { -#if defined(LIBC_TARGET_ARCH_IS_AARCH64) && !defined(LIBC_COMPILER_IS_MSVC) +#if defined(LIBC_TARGET_ARCH_IS_AARCH64) && !defined(LIBC_COMPILER_IS_MSVC) && \ + defined(__ARM_FP) using FPState = LIBC_NAMESPACE::fputil::FEnv::FPState; const FPState &before_state = reinterpret_cast(before_fenv); const FPState &after_state = reinterpret_cast(after_fenv); From fedd3b0399e78f47c5795f436b88722319832172 Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Wed, 5 Nov 2025 12:15:48 +0100 Subject: [PATCH 02/39] [clang][bytecode] Remove dummy variables once they are proper globals (#166174) Dummy variables have an entry in `Program::Globals`, but they are not added to `GlobalIndices`. When registering redeclarations, we used to only patch up the global indices, but that left the dummy variables alone. Update the dummy variables of all redeclarations as well. Fixes https://github.com/llvm/llvm-project/issues/165952 --- clang/lib/AST/ByteCode/Program.cpp | 29 +++++++++++++++++++++++++---- clang/lib/AST/ByteCode/Program.h | 1 - clang/test/AST/ByteCode/records.cpp | 11 +++++++++++ 3 files changed, 36 insertions(+), 5 deletions(-) diff --git a/clang/lib/AST/ByteCode/Program.cpp b/clang/lib/AST/ByteCode/Program.cpp index e0b2852f0e906..2425373ab2ef8 100644 --- a/clang/lib/AST/ByteCode/Program.cpp +++ b/clang/lib/AST/ByteCode/Program.cpp @@ -218,21 +218,42 @@ UnsignedOrNone Program::createGlobal(const ValueDecl *VD, const Expr *Init) { return std::nullopt; Global *NewGlobal = Globals[*Idx]; + // Note that this loop has one iteration where Redecl == VD. for (const Decl *Redecl : VD->redecls()) { - unsigned &PIdx = GlobalIndices[Redecl]; + + // If this redecl was registered as a dummy variable, it is now a proper + // global variable and points to the block we just created. + if (auto DummyIt = DummyVariables.find(Redecl); + DummyIt != DummyVariables.end()) { + assert(!Globals[DummyIt->second]->block()->hasPointers()); + Globals[DummyIt->second] = NewGlobal; + DummyVariables.erase(DummyIt); + } + // If the redeclaration hasn't been registered yet at all, we just set its + // global index to Idx. If it has been registered yet, it might have + // pointers pointing to it and we need to transfer those pointers to the new + // block. + auto [Iter, Inserted] = GlobalIndices.try_emplace(Redecl); + if (Inserted) { + GlobalIndices[Redecl] = *Idx; + continue; + } + if (Redecl != VD) { - if (Block *RedeclBlock = Globals[PIdx]->block(); + if (Block *RedeclBlock = Globals[Iter->second]->block(); RedeclBlock->isExtern()) { - Globals[PIdx] = NewGlobal; + // All pointers pointing to the previous extern decl now point to the // new decl. // A previous iteration might've already fixed up the pointers for this // global. if (RedeclBlock != NewGlobal->block()) RedeclBlock->movePointersTo(NewGlobal->block()); + + Globals[Iter->second] = NewGlobal; } } - PIdx = *Idx; + Iter->second = *Idx; } return *Idx; diff --git a/clang/lib/AST/ByteCode/Program.h b/clang/lib/AST/ByteCode/Program.h index 28fcc97f5339d..cc9127dc77860 100644 --- a/clang/lib/AST/ByteCode/Program.h +++ b/clang/lib/AST/ByteCode/Program.h @@ -205,7 +205,6 @@ class Program final { const Block *block() const { return &B; } private: - /// Required metadata - does not actually track pointers. Block B; }; diff --git a/clang/test/AST/ByteCode/records.cpp b/clang/test/AST/ByteCode/records.cpp index 83f32c97c50c7..4799ebe25dde1 100644 --- a/clang/test/AST/ByteCode/records.cpp +++ b/clang/test/AST/ByteCode/records.cpp @@ -1882,3 +1882,14 @@ namespace MethodWillHaveBody { } int n = f(0); // both-note {{instantiation of}} } + +namespace StaticRedecl { + struct T { + static T tt; + constexpr T() : p(&tt) {} + T *p; + }; + T T::tt; + constexpr T t; + static_assert(t.p == &T::tt, ""); +} From e8564830c19e6fd5bfa38488c06f332b214ea858 Mon Sep 17 00:00:00 2001 From: Congcong Cai Date: Wed, 5 Nov 2025 19:25:53 +0800 Subject: [PATCH 03/39] [clang-tidy][doc] add more information in twine-local's document (#166266) explain more about use-after-free in llvm-twine-local add note about manually adjusting code after applying fix-it. fixed: #154810 --- .../clang-tidy/checks/llvm/twine-local.rst | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/clang-tools-extra/docs/clang-tidy/checks/llvm/twine-local.rst b/clang-tools-extra/docs/clang-tidy/checks/llvm/twine-local.rst index ec9ef1c60913c..6c994a48d83de 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/llvm/twine-local.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/llvm/twine-local.rst @@ -14,3 +14,21 @@ should be generally avoided. // becomes static std::string Moo = (Twine("bark") + "bah").str(); + +The ``Twine`` does not own the memory of its contents, so it is not +recommended to use ``Twine`` created from temporary strings or string literals. + +.. code-block:: c++ + + static Twine getModuleIdentifier(StringRef moduleName) { + return moduleName + "_module"; + } + void foo() { + Twine result = getModuleIdentifier(std::string{"abc"} + "def"); + // temporary std::string is destroyed here, result is dangling + } + +After applying this fix-it hints, the code will use ``std::string`` instead of +``Twine`` for local variables. However, ``Twine`` has lots of methods that +are incompatible with ``std::string``, so the user may need to adjust the code +manually after applying the fix-it hints. From c1dc064ba063f0d679f1a6d6aeef99becea8b709 Mon Sep 17 00:00:00 2001 From: Sirui Mu Date: Wed, 5 Nov 2025 19:32:34 +0800 Subject: [PATCH 04/39] [CIR] Add support for storing into _Atomic variables (#165872) --- clang/include/clang/CIR/MissingFeatures.h | 2 + clang/lib/CIR/CodeGen/CIRGenAtomic.cpp | 121 +++++++++++++++++++++- clang/lib/CIR/CodeGen/CIRGenExpr.cpp | 14 ++- clang/lib/CIR/CodeGen/CIRGenFunction.h | 7 +- clang/test/CIR/CodeGen/atomic.c | 26 +++++ 5 files changed, 162 insertions(+), 8 deletions(-) diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h index 48ef8be9fb782..6f099a7027a10 100644 --- a/clang/include/clang/CIR/MissingFeatures.h +++ b/clang/include/clang/CIR/MissingFeatures.h @@ -180,6 +180,8 @@ struct MissingFeatures { static bool atomicSyncScopeID() { return false; } static bool atomicTypes() { return false; } static bool atomicUseLibCall() { return false; } + static bool atomicMicrosoftVolatile() { return false; } + static bool atomicOpenMP() { return false; } // Global ctor handling static bool globalCtorLexOrder() { return false; } diff --git a/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp b/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp index 7db6e283ec0a5..cd4c1f0e5b769 100644 --- a/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp @@ -27,6 +27,7 @@ class AtomicInfo { CharUnits atomicAlign; CharUnits valueAlign; TypeEvaluationKind evaluationKind = cir::TEK_Scalar; + bool useLibCall = true; LValue lvalue; mlir::Location loc; @@ -62,8 +63,8 @@ class AtomicInfo { assert(!cir::MissingFeatures::atomicInfo()); cgf.cgm.errorNYI(loc, "AtomicInfo: non-simple lvalue"); } - - assert(!cir::MissingFeatures::atomicUseLibCall()); + useLibCall = !ctx.getTargetInfo().hasBuiltinAtomic( + atomicSizeInBits, ctx.toBits(lvalue.getAlignment())); } QualType getValueType() const { return valueTy; } @@ -75,6 +76,8 @@ class AtomicInfo { assert(!cir::MissingFeatures::atomicInfoGetAtomicPointer()); return nullptr; } + bool shouldUseLibCall() const { return useLibCall; } + const LValue &getAtomicLValue() const { return lvalue; } Address getAtomicAddress() const { mlir::Type elemTy; if (lvalue.isSimple()) { @@ -96,6 +99,8 @@ class AtomicInfo { bool emitMemSetZeroIfNecessary() const; + mlir::Value getScalarRValValueOrNull(RValue rvalue) const; + /// Cast the given pointer to an integer pointer suitable for atomic /// operations on the source. Address castToAtomicIntPointer(Address addr) const; @@ -105,6 +110,9 @@ class AtomicInfo { /// copy the value across. Address convertToAtomicIntPointer(Address addr) const; + /// Converts a rvalue to integer value. + mlir::Value convertRValueToInt(RValue rvalue, bool cmpxchg = false) const; + /// Copy an atomic r-value into atomic-layout memory. void emitCopyIntoMemory(RValue rvalue) const; @@ -195,6 +203,12 @@ Address AtomicInfo::createTempAlloca() const { return tempAlloca; } +mlir::Value AtomicInfo::getScalarRValValueOrNull(RValue rvalue) const { + if (rvalue.isScalar() && (!hasPadding() || !lvalue.isSimple())) + return rvalue.getValue(); + return nullptr; +} + Address AtomicInfo::castToAtomicIntPointer(Address addr) const { auto intTy = mlir::dyn_cast(addr.getElementType()); // Don't bother with int casts if the integer size is the same. @@ -211,10 +225,38 @@ bool AtomicInfo::emitMemSetZeroIfNecessary() const { return false; cgf.cgm.errorNYI(loc, - "AtomicInfo::emitMemSetZeroIfNecessary: emit memset zero"); + "AtomicInfo::emitMemSetZeroIfNecaessary: emit memset zero"); return false; } +/// Return true if \param valueTy is a type that should be casted to integer +/// around the atomic memory operation. If \param cmpxchg is true, then the +/// cast of a floating point type is made as that instruction can not have +/// floating point operands. TODO: Allow compare-and-exchange and FP - see +/// comment in CIRGenAtomicExpandPass.cpp. +static bool shouldCastToInt(mlir::Type valueTy, bool cmpxchg) { + if (cir::isAnyFloatingPointType(valueTy)) + return isa(valueTy) || cmpxchg; + return !isa(valueTy) && !isa(valueTy); +} + +mlir::Value AtomicInfo::convertRValueToInt(RValue rvalue, bool cmpxchg) const { + // If we've got a scalar value of the right size, try to avoid going + // through memory. Floats get casted if needed by AtomicExpandPass. + if (mlir::Value value = getScalarRValValueOrNull(rvalue)) { + if (!shouldCastToInt(value.getType(), cmpxchg)) + return cgf.emitToMemory(value, valueTy); + + cgf.cgm.errorNYI( + loc, "AtomicInfo::convertRValueToInt: cast scalar rvalue to int"); + return nullptr; + } + + cgf.cgm.errorNYI( + loc, "AtomicInfo::convertRValueToInt: cast non-scalar rvalue to int"); + return nullptr; +} + /// Copy an r-value into memory as part of storing to an atomic type. /// This needs to create a bit-pattern suitable for atomic operations. void AtomicInfo::emitCopyIntoMemory(RValue rvalue) const { @@ -815,6 +857,79 @@ RValue CIRGenFunction::emitAtomicExpr(AtomicExpr *e) { e->getExprLoc()); } +void CIRGenFunction::emitAtomicStore(RValue rvalue, LValue dest, bool isInit) { + bool isVolatile = dest.isVolatileQualified(); + auto order = cir::MemOrder::SequentiallyConsistent; + if (!dest.getType()->isAtomicType()) { + assert(!cir::MissingFeatures::atomicMicrosoftVolatile()); + } + return emitAtomicStore(rvalue, dest, order, isVolatile, isInit); +} + +/// Emit a store to an l-value of atomic type. +/// +/// Note that the r-value is expected to be an r-value of the atomic type; this +/// means that for aggregate r-values, it should include storage for any padding +/// that was necessary. +void CIRGenFunction::emitAtomicStore(RValue rvalue, LValue dest, + cir::MemOrder order, bool isVolatile, + bool isInit) { + // If this is an aggregate r-value, it should agree in type except + // maybe for address-space qualification. + mlir::Location loc = dest.getPointer().getLoc(); + assert(!rvalue.isAggregate() || + rvalue.getAggregateAddress().getElementType() == + dest.getAddress().getElementType()); + + AtomicInfo atomics(*this, dest, loc); + LValue lvalue = atomics.getAtomicLValue(); + + if (lvalue.isSimple()) { + // If this is an initialization, just put the value there normally. + if (isInit) { + atomics.emitCopyIntoMemory(rvalue); + return; + } + + // Check whether we should use a library call. + if (atomics.shouldUseLibCall()) { + assert(!cir::MissingFeatures::atomicUseLibCall()); + cgm.errorNYI(loc, "emitAtomicStore: atomic store with library call"); + return; + } + + // Okay, we're doing this natively. + mlir::Value valueToStore = atomics.convertRValueToInt(rvalue); + + // Do the atomic store. + Address addr = atomics.getAtomicAddress(); + if (mlir::Value value = atomics.getScalarRValValueOrNull(rvalue)) { + if (shouldCastToInt(value.getType(), /*CmpXchg=*/false)) { + addr = atomics.castToAtomicIntPointer(addr); + valueToStore = + builder.createIntCast(valueToStore, addr.getElementType()); + } + } + cir::StoreOp store = builder.createStore(loc, valueToStore, addr); + + // Initializations don't need to be atomic. + if (!isInit) { + assert(!cir::MissingFeatures::atomicOpenMP()); + store.setMemOrder(order); + } + + // Other decoration. + if (isVolatile) + store.setIsVolatile(true); + + assert(!cir::MissingFeatures::opLoadStoreTbaa()); + return; + } + + cgm.errorNYI(loc, "emitAtomicStore: non-simple atomic lvalue"); + assert(!cir::MissingFeatures::opLoadStoreAtomic()); +} + void CIRGenFunction::emitAtomicInit(Expr *init, LValue dest) { AtomicInfo atomics(*this, dest, getLoc(init->getSourceRange())); diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp index 4fb178df0e508..422fa1cf5ad2e 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp @@ -311,7 +311,8 @@ static LValue emitGlobalVarDeclLValue(CIRGenFunction &cgf, const Expr *e, void CIRGenFunction::emitStoreOfScalar(mlir::Value value, Address addr, bool isVolatile, QualType ty, - bool isInit, bool isNontemporal) { + LValueBaseInfo baseInfo, bool isInit, + bool isNontemporal) { assert(!cir::MissingFeatures::opLoadStoreThreadLocal()); if (const auto *clangVecTy = ty->getAs()) { @@ -333,7 +334,13 @@ void CIRGenFunction::emitStoreOfScalar(mlir::Value value, Address addr, value = emitToMemory(value, ty); - assert(!cir::MissingFeatures::opLoadStoreAtomic()); + assert(!cir::MissingFeatures::opLoadStoreTbaa()); + LValue atomicLValue = LValue::makeAddr(addr, ty, baseInfo); + if (ty->isAtomicType() || + (!isInit && isLValueSuitableForInlineAtomic(atomicLValue))) { + emitAtomicStore(RValue::get(value), atomicLValue, isInit); + return; + } // Update the alloca with more info on initialization. assert(addr.getPointer() && "expected pointer to exist"); @@ -550,7 +557,8 @@ void CIRGenFunction::emitStoreOfScalar(mlir::Value value, LValue lvalue, } emitStoreOfScalar(value, lvalue.getAddress(), lvalue.isVolatile(), - lvalue.getType(), isInit, /*isNontemporal=*/false); + lvalue.getType(), lvalue.getBaseInfo(), isInit, + /*isNontemporal=*/false); } mlir::Value CIRGenFunction::emitLoadOfScalar(Address addr, bool isVolatile, diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h index dece642eb13b6..1c52a78d72e33 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.h +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h @@ -1271,6 +1271,9 @@ class CIRGenFunction : public CIRGenTypeCache { RValue emitAtomicExpr(AtomicExpr *e); void emitAtomicInit(Expr *init, LValue dest); + void emitAtomicStore(RValue rvalue, LValue dest, bool isInit); + void emitAtomicStore(RValue rvalue, LValue dest, cir::MemOrder order, + bool isVolatile, bool isInit); AutoVarEmission emitAutoVarAlloca(const clang::VarDecl &d, mlir::OpBuilder::InsertPoint ip = {}); @@ -1680,8 +1683,8 @@ class CIRGenFunction : public CIRGenTypeCache { bool isInit); void emitStoreOfScalar(mlir::Value value, Address addr, bool isVolatile, - clang::QualType ty, bool isInit = false, - bool isNontemporal = false); + clang::QualType ty, LValueBaseInfo baseInfo, + bool isInit = false, bool isNontemporal = false); void emitStoreOfScalar(mlir::Value value, LValue lvalue, bool isInit); /// Store the specified rvalue into the specified diff --git a/clang/test/CIR/CodeGen/atomic.c b/clang/test/CIR/CodeGen/atomic.c index 65799881a0cbe..d5bea8446d730 100644 --- a/clang/test/CIR/CodeGen/atomic.c +++ b/clang/test/CIR/CodeGen/atomic.c @@ -46,6 +46,32 @@ void f2(void) { // OGCG-NEXT: store i32 42, ptr %[[SLOT]], align 4 // OGCG: } +void f3(_Atomic(int) *p) { + *p = 42; +} + +// CIR-LABEL: @f3 +// CIR: cir.store align(4) atomic(seq_cst) %{{.+}}, %{{.+}} : !s32i, !cir.ptr + +// LLVM-LABEL: @f3 +// LLVM: store atomic i32 42, ptr %{{.+}} seq_cst, align 4 + +// OGCG-LABEL: @f3 +// OGCG: store atomic i32 42, ptr %{{.+}} seq_cst, align 4 + +void f4(_Atomic(float) *p) { + *p = 3.14; +} + +// CIR-LABEL: @f4 +// CIR: cir.store align(4) atomic(seq_cst) %{{.+}}, %{{.+}} : !cir.float, !cir.ptr + +// LLVM-LABEL: @f4 +// LLVM: store atomic float 0x40091EB860000000, ptr %{{.+}} seq_cst, align 4 + +// OGCG-LABEL: @f4 +// OGCG: store atomic float 0x40091EB860000000, ptr %{{.+}} seq_cst, align 4 + void load(int *ptr) { int x; __atomic_load(ptr, &x, __ATOMIC_RELAXED); From a38e0942407ef3395264831ef971838b69d2a652 Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Wed, 5 Nov 2025 21:04:32 +0900 Subject: [PATCH 05/39] [mlir] Dialect Conversion: Add support for post-order legalization order (#166292) By default, the dialect conversion driver processes operations in pre-order: the initial worklist is populated pre-order. (New/modified operations are immediately legalized recursively.) This commit adds a new API for selective post-order legalization. Patterns can request an operation / region legalization via `ConversionPatternRewriter::legalize`. They can call these helper functions on nested regions before rewriting the operation itself. Note: In rollback mode, a failed recursive legalization typically leads to a conversion failure. Since recursive legalization is performed by separate pattern applications, there is no way for the original pattern to recover from such a failure. --- .../mlir/Transforms/DialectConversion.h | 25 +++- .../Transforms/Utils/DialectConversion.cpp | 120 +++++++++++++----- mlir/test/Transforms/test-legalizer-full.mlir | 18 +++ .../Transforms/test-legalizer-rollback.mlir | 19 +++ mlir/test/Transforms/test-legalizer.mlir | 32 +++++ mlir/test/lib/Dialect/Test/TestPatterns.cpp | 22 +++- 6 files changed, 199 insertions(+), 37 deletions(-) diff --git a/mlir/include/mlir/Transforms/DialectConversion.h b/mlir/include/mlir/Transforms/DialectConversion.h index ed7e2a08ebfd9..5ac9e26e8636d 100644 --- a/mlir/include/mlir/Transforms/DialectConversion.h +++ b/mlir/include/mlir/Transforms/DialectConversion.h @@ -981,6 +981,28 @@ class ConversionPatternRewriter final : public PatternRewriter { /// Return a reference to the internal implementation. detail::ConversionPatternRewriterImpl &getImpl(); + /// Attempt to legalize the given operation. This can be used within + /// conversion patterns to change the default pre-order legalization order. + /// Returns "success" if the operation was legalized, "failure" otherwise. + /// + /// Note: In a partial conversion, this function returns "success" even if + /// the operation could not be legalized, as long as it was not explicitly + /// marked as illegal in the conversion target. + LogicalResult legalize(Operation *op); + + /// Attempt to legalize the given region. This can be used within + /// conversion patterns to change the default pre-order legalization order. + /// Returns "success" if the region was legalized, "failure" otherwise. + /// + /// If the current pattern runs with a type converter, the entry block + /// signature will be converted before legalizing the operations in the + /// region. + /// + /// Note: In a partial conversion, this function returns "success" even if + /// an operation could not be legalized, as long as it was not explicitly + /// marked as illegal in the conversion target. + LogicalResult legalize(Region *r); + private: // Allow OperationConverter to construct new rewriters. friend struct OperationConverter; @@ -989,7 +1011,8 @@ class ConversionPatternRewriter final : public PatternRewriter { /// conversions. They apply some IR rewrites in a delayed fashion and could /// bring the IR into an inconsistent state when used standalone. explicit ConversionPatternRewriter(MLIRContext *ctx, - const ConversionConfig &config); + const ConversionConfig &config, + OperationConverter &converter); // Hide unsupported pattern rewriter API. using OpBuilder::setListener; diff --git a/mlir/lib/Transforms/Utils/DialectConversion.cpp b/mlir/lib/Transforms/Utils/DialectConversion.cpp index 2fe06970eb568..f8c38fadbd229 100644 --- a/mlir/lib/Transforms/Utils/DialectConversion.cpp +++ b/mlir/lib/Transforms/Utils/DialectConversion.cpp @@ -92,6 +92,22 @@ static OpBuilder::InsertPoint computeInsertPoint(ArrayRef vals) { return pt; } +namespace { +enum OpConversionMode { + /// In this mode, the conversion will ignore failed conversions to allow + /// illegal operations to co-exist in the IR. + Partial, + + /// In this mode, all operations must be legal for the given target for the + /// conversion to succeed. + Full, + + /// In this mode, operations are analyzed for legality. No actual rewrites are + /// applied to the operations on success. + Analysis, +}; +} // namespace + //===----------------------------------------------------------------------===// // ConversionValueMapping //===----------------------------------------------------------------------===// @@ -866,8 +882,9 @@ namespace mlir { namespace detail { struct ConversionPatternRewriterImpl : public RewriterBase::Listener { explicit ConversionPatternRewriterImpl(ConversionPatternRewriter &rewriter, - const ConversionConfig &config) - : rewriter(rewriter), config(config), + const ConversionConfig &config, + OperationConverter &opConverter) + : rewriter(rewriter), config(config), opConverter(opConverter), notifyingRewriter(rewriter.getContext(), config.listener) {} //===--------------------------------------------------------------------===// @@ -1124,6 +1141,9 @@ struct ConversionPatternRewriterImpl : public RewriterBase::Listener { /// Dialect conversion configuration. const ConversionConfig &config; + /// The operation converter to use for recursive legalization. + OperationConverter &opConverter; + /// A set of erased operations. This set is utilized only if /// `allowPatternRollback` is set to "false". Conceptually, this set is /// similar to `replacedOps` (which is maintained when the flag is set to @@ -2084,9 +2104,10 @@ void ConversionPatternRewriterImpl::notifyMatchFailure( //===----------------------------------------------------------------------===// ConversionPatternRewriter::ConversionPatternRewriter( - MLIRContext *ctx, const ConversionConfig &config) - : PatternRewriter(ctx), - impl(new detail::ConversionPatternRewriterImpl(*this, config)) { + MLIRContext *ctx, const ConversionConfig &config, + OperationConverter &opConverter) + : PatternRewriter(ctx), impl(new detail::ConversionPatternRewriterImpl( + *this, config, opConverter)) { setListener(impl.get()); } @@ -2207,6 +2228,37 @@ ConversionPatternRewriter::getRemappedValues(ValueRange keys, return success(); } +LogicalResult ConversionPatternRewriter::legalize(Region *r) { + // Fast path: If the region is empty, there is nothing to legalize. + if (r->empty()) + return success(); + + // Gather a list of all operations to legalize. This is done before + // converting the entry block signature because unrealized_conversion_cast + // ops should not be included. + SmallVector ops; + for (Block &b : *r) + for (Operation &op : b) + ops.push_back(&op); + + // If the current pattern runs with a type converter, convert the entry block + // signature. + if (const TypeConverter *converter = impl->currentTypeConverter) { + std::optional conversion = + converter->convertBlockSignature(&r->front()); + if (!conversion) + return failure(); + applySignatureConversion(&r->front(), *conversion, converter); + } + + // Legalize all operations in the region. + for (Operation *op : ops) + if (failed(legalize(op))) + return failure(); + + return success(); +} + void ConversionPatternRewriter::inlineBlockBefore(Block *source, Block *dest, Block::iterator before, ValueRange argValues) { @@ -3192,22 +3244,6 @@ static void reconcileUnrealizedCasts( // OperationConverter //===----------------------------------------------------------------------===// -namespace { -enum OpConversionMode { - /// In this mode, the conversion will ignore failed conversions to allow - /// illegal operations to co-exist in the IR. - Partial, - - /// In this mode, all operations must be legal for the given target for the - /// conversion to succeed. - Full, - - /// In this mode, operations are analyzed for legality. No actual rewrites are - /// applied to the operations on success. - Analysis, -}; -} // namespace - namespace mlir { // This class converts operations to a given conversion target via a set of // rewrite patterns. The conversion behaves differently depending on the @@ -3217,16 +3253,20 @@ struct OperationConverter { const FrozenRewritePatternSet &patterns, const ConversionConfig &config, OpConversionMode mode) - : rewriter(ctx, config), opLegalizer(rewriter, target, patterns), + : rewriter(ctx, config, *this), opLegalizer(rewriter, target, patterns), mode(mode) {} /// Converts the given operations to the conversion target. LogicalResult convertOperations(ArrayRef ops); -private: - /// Converts an operation with the given rewriter. - LogicalResult convert(Operation *op); + /// Converts a single operation. If `isRecursiveLegalization` is "true", the + /// conversion is a recursive legalization request, triggered from within a + /// pattern. In that case, do not emit errors because there will be another + /// attempt at legalizing the operation later (via the regular pre-order + /// legalization mechanism). + LogicalResult convert(Operation *op, bool isRecursiveLegalization = false); +private: /// The rewriter to use when converting operations. ConversionPatternRewriter rewriter; @@ -3238,32 +3278,42 @@ struct OperationConverter { }; } // namespace mlir -LogicalResult OperationConverter::convert(Operation *op) { +LogicalResult ConversionPatternRewriter::legalize(Operation *op) { + return impl->opConverter.convert(op, /*isRecursiveLegalization=*/true); +} + +LogicalResult OperationConverter::convert(Operation *op, + bool isRecursiveLegalization) { const ConversionConfig &config = rewriter.getConfig(); // Legalize the given operation. if (failed(opLegalizer.legalize(op))) { // Handle the case of a failed conversion for each of the different modes. // Full conversions expect all operations to be converted. - if (mode == OpConversionMode::Full) - return op->emitError() - << "failed to legalize operation '" << op->getName() << "'"; + if (mode == OpConversionMode::Full) { + if (!isRecursiveLegalization) + op->emitError() << "failed to legalize operation '" << op->getName() + << "'"; + return failure(); + } // Partial conversions allow conversions to fail iff the operation was not // explicitly marked as illegal. If the user provided a `unlegalizedOps` // set, non-legalizable ops are added to that set. if (mode == OpConversionMode::Partial) { - if (opLegalizer.isIllegal(op)) - return op->emitError() - << "failed to legalize operation '" << op->getName() - << "' that was explicitly marked illegal"; - if (config.unlegalizedOps) + if (opLegalizer.isIllegal(op)) { + if (!isRecursiveLegalization) + op->emitError() << "failed to legalize operation '" << op->getName() + << "' that was explicitly marked illegal"; + return failure(); + } + if (config.unlegalizedOps && !isRecursiveLegalization) config.unlegalizedOps->insert(op); } } else if (mode == OpConversionMode::Analysis) { // Analysis conversions don't fail if any operations fail to legalize, // they are only interested in the operations that were successfully // legalized. - if (config.legalizableOps) + if (config.legalizableOps && !isRecursiveLegalization) config.legalizableOps->insert(op); } return success(); diff --git a/mlir/test/Transforms/test-legalizer-full.mlir b/mlir/test/Transforms/test-legalizer-full.mlir index 42cec68b9fbbb..8da9109a32762 100644 --- a/mlir/test/Transforms/test-legalizer-full.mlir +++ b/mlir/test/Transforms/test-legalizer-full.mlir @@ -72,3 +72,21 @@ builtin.module { } } + +// ----- + +// The region of "test.post_order_legalization" is converted before the op. + +// expected-remark@+1 {{applyFullConversion failed}} +builtin.module { +func.func @test_preorder_legalization() { + // expected-error@+1 {{failed to legalize operation 'test.post_order_legalization'}} + "test.post_order_legalization"() ({ + ^bb0(%arg0: i64): + // Not-explicitly-legal ops are not allowed to survive. + "test.remaining_consumer"(%arg0) : (i64) -> () + "test.invalid"(%arg0) : (i64) -> () + }) : () -> () + return +} +} diff --git a/mlir/test/Transforms/test-legalizer-rollback.mlir b/mlir/test/Transforms/test-legalizer-rollback.mlir index 71e11782e14b0..4bcca6b7e5228 100644 --- a/mlir/test/Transforms/test-legalizer-rollback.mlir +++ b/mlir/test/Transforms/test-legalizer-rollback.mlir @@ -163,3 +163,22 @@ func.func @create_unregistered_op_in_pattern() -> i32 { "test.return"(%0) : (i32) -> () } } + +// ----- + +// CHECK-LABEL: func @test_failed_preorder_legalization +// CHECK: "test.post_order_legalization"() ({ +// CHECK: %[[r:.*]] = "test.illegal_op_g"() : () -> i32 +// CHECK: "test.return"(%[[r]]) : (i32) -> () +// CHECK: }) : () -> () +// expected-remark @+1 {{applyPartialConversion failed}} +module { +func.func @test_failed_preorder_legalization() { + // expected-error @+1 {{failed to legalize operation 'test.post_order_legalization' that was explicitly marked illegal}} + "test.post_order_legalization"() ({ + %0 = "test.illegal_op_g"() : () -> (i32) + "test.return"(%0) : (i32) -> () + }) : () -> () + return +} +} diff --git a/mlir/test/Transforms/test-legalizer.mlir b/mlir/test/Transforms/test-legalizer.mlir index 7c43bb7bface0..88a71cc26ab0c 100644 --- a/mlir/test/Transforms/test-legalizer.mlir +++ b/mlir/test/Transforms/test-legalizer.mlir @@ -448,3 +448,35 @@ func.func @test_working_1to1_pattern(%arg0: f16) { "test.type_consumer"(%arg0) : (f16) -> () "test.return"() : () -> () } + +// ----- + +// The region of "test.post_order_legalization" is converted before the op. + +// CHECK: notifyBlockInserted into test.post_order_legalization: was unlinked +// CHECK: notifyOperationInserted: test.invalid +// CHECK: notifyBlockErased +// CHECK: notifyOperationInserted: test.valid, was unlinked +// CHECK: notifyOperationReplaced: test.invalid +// CHECK: notifyOperationErased: test.invalid +// CHECK: notifyOperationModified: test.post_order_legalization + +// CHECK-LABEL: func @test_preorder_legalization +// CHECK: "test.post_order_legalization"() ({ +// CHECK: ^{{.*}}(%[[arg0:.*]]: f64): +// Note: The survival of a not-explicitly-invalid operation does *not* cause +// a conversion failure in when applying a partial conversion. +// CHECK: %[[cast:.*]] = "test.cast"(%[[arg0]]) : (f64) -> i64 +// CHECK: "test.remaining_consumer"(%[[cast]]) : (i64) -> () +// CHECK: "test.valid"(%[[arg0]]) : (f64) -> () +// CHECK: }) {is_legal} : () -> () +func.func @test_preorder_legalization() { + "test.post_order_legalization"() ({ + ^bb0(%arg0: i64): + // expected-remark @+1 {{'test.remaining_consumer' is not legalizable}} + "test.remaining_consumer"(%arg0) : (i64) -> () + "test.invalid"(%arg0) : (i64) -> () + }) : () -> () + // expected-remark @+1 {{'func.return' is not legalizable}} + return +} diff --git a/mlir/test/lib/Dialect/Test/TestPatterns.cpp b/mlir/test/lib/Dialect/Test/TestPatterns.cpp index 12edecc113495..9b64bc691588d 100644 --- a/mlir/test/lib/Dialect/Test/TestPatterns.cpp +++ b/mlir/test/lib/Dialect/Test/TestPatterns.cpp @@ -1418,6 +1418,22 @@ class TestTypeConsumerOpPattern } }; +class TestPostOrderLegalization : public ConversionPattern { +public: + TestPostOrderLegalization(MLIRContext *ctx, const TypeConverter &converter) + : ConversionPattern(converter, "test.post_order_legalization", 1, ctx) {} + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + for (Region &r : op->getRegions()) + if (failed(rewriter.legalize(&r))) + return failure(); + rewriter.modifyOpInPlace( + op, [&]() { op->setAttr("is_legal", rewriter.getUnitAttr()); }); + return success(); + } +}; + /// Test unambiguous overload resolution of replaceOpWithMultiple. This /// function is just to trigger compiler errors. It is never executed. [[maybe_unused]] void testReplaceOpWithMultipleOverloads( @@ -1532,7 +1548,8 @@ struct TestLegalizePatternDriver patterns.add(&getContext(), converter); + TestTypeConsumerOpPattern, TestPostOrderLegalization>( + &getContext(), converter); patterns.add(converter, &getContext()); mlir::populateAnyFunctionOpInterfaceTypeConversionPattern(patterns, converter); @@ -1560,6 +1577,9 @@ struct TestLegalizePatternDriver target.addDynamicallyLegalOp( OperationName("test.value_replace", &getContext()), [](Operation *op) { return op->hasAttr("is_legal"); }); + target.addDynamicallyLegalOp( + OperationName("test.post_order_legalization", &getContext()), + [](Operation *op) { return op->hasAttr("is_legal"); }); // TestCreateUnregisteredOp creates `arith.constant` operation, // which was not added to target intentionally to test From 6c640b86e6e03298385231cb7e77d2f3524bc643 Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Wed, 5 Nov 2025 21:24:59 +0900 Subject: [PATCH 06/39] [mlir][LLVM] Fix unsupported FP lowering in `VectorConvertToLLVMPattern` (#166513) Fixes a bug in `VectorConvertToLLVMPattern`, which converted operations with unsupported FP types. E.g., `arith.addf ... : f4E2M1FN` was lowered to `llvm.fadd ... : i4`, which does not verify. There are a few more patterns that have the same bug. Those will be fixed in follow-up PRs. This commit is in preparation of adding an `APFloat`-based lowering for `arith` operations with unsupported floating-point types. --- .../Conversion/LLVMCommon/VectorPattern.h | 31 +++++++++++++++++++ .../Conversion/ArithToLLVM/arith-to-llvm.mlir | 26 ++++++++++++++++ 2 files changed, 57 insertions(+) diff --git a/mlir/include/mlir/Conversion/LLVMCommon/VectorPattern.h b/mlir/include/mlir/Conversion/LLVMCommon/VectorPattern.h index 964281592cc65..cad6cec761ab8 100644 --- a/mlir/include/mlir/Conversion/LLVMCommon/VectorPattern.h +++ b/mlir/include/mlir/Conversion/LLVMCommon/VectorPattern.h @@ -92,12 +92,43 @@ class VectorConvertToLLVMPattern : public ConvertOpToLLVMPattern { using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern; using Super = VectorConvertToLLVMPattern; + /// Return the given type if it's a floating point type. If the given type is + /// a vector type, return its element type if it's a floating point type. + static FloatType getFloatingPointType(Type type) { + if (auto floatType = dyn_cast(type)) + return floatType; + if (auto vecType = dyn_cast(type)) + return dyn_cast(vecType.getElementType()); + return nullptr; + } + LogicalResult matchAndRewrite(SourceOp op, typename SourceOp::Adaptor adaptor, ConversionPatternRewriter &rewriter) const override { static_assert( std::is_base_of, SourceOp>::value, "expected single result op"); + + // The pattern should not apply if a floating-point operand is converted to + // a non-floating-point type. This indicates that the floating point type + // is not supported by the LLVM lowering. (Such types are converted to + // integers.) + auto checkType = [&](Value v) -> LogicalResult { + FloatType floatType = getFloatingPointType(v.getType()); + if (!floatType) + return success(); + Type convertedType = this->getTypeConverter()->convertType(floatType); + if (!isa_and_nonnull(convertedType)) + return rewriter.notifyMatchFailure(op, + "unsupported floating point type"); + return success(); + }; + for (Value operand : op->getOperands()) + if (failed(checkType(operand))) + return failure(); + if (failed(checkType(op->getResult(0)))) + return failure(); + // Determine attributes for the target op AttrConvert attrConvert(op); diff --git a/mlir/test/Conversion/ArithToLLVM/arith-to-llvm.mlir b/mlir/test/Conversion/ArithToLLVM/arith-to-llvm.mlir index ba12ff29ebef9..b5dcb01d3dc6b 100644 --- a/mlir/test/Conversion/ArithToLLVM/arith-to-llvm.mlir +++ b/mlir/test/Conversion/ArithToLLVM/arith-to-llvm.mlir @@ -747,3 +747,29 @@ func.func @memref_bitcast(%1: memref) -> memref { %2 = arith.bitcast %1 : memref to memref func.return %2 : memref } + +// ----- + +// CHECK-LABEL: func @unsupported_fp_type +// CHECK: arith.addf {{.*}} : f4E2M1FN +// CHECK: arith.addf {{.*}} : vector<4xf4E2M1FN> +// CHECK: arith.addf {{.*}} : vector<8x4xf4E2M1FN> +func.func @unsupported_fp_type(%arg0: f4E2M1FN, %arg1: vector<4xf4E2M1FN>, %arg2: vector<8x4xf4E2M1FN>) -> (f4E2M1FN, vector<4xf4E2M1FN>, vector<8x4xf4E2M1FN>) { + %0 = arith.addf %arg0, %arg0 : f4E2M1FN + %1 = arith.addf %arg1, %arg1 : vector<4xf4E2M1FN> + %2 = arith.addf %arg2, %arg2 : vector<8x4xf4E2M1FN> + return %0, %1, %2 : f4E2M1FN, vector<4xf4E2M1FN>, vector<8x4xf4E2M1FN> +} + +// ----- + +// CHECK-LABEL: func @supported_fp_type +// CHECK: llvm.fadd {{.*}} : f32 +// CHECK: llvm.fadd {{.*}} : vector<4xf32> +// CHECK-COUNT-4: llvm.fadd {{.*}} : vector<8xf32> +func.func @supported_fp_type(%arg0: f32, %arg1: vector<4xf32>, %arg2: vector<4x8xf32>) -> (f32, vector<4xf32>, vector<4x8xf32>) { + %0 = arith.addf %arg0, %arg0 : f32 + %1 = arith.addf %arg1, %arg1 : vector<4xf32> + %2 = arith.addf %arg2, %arg2 : vector<4x8xf32> + return %0, %1, %2 : f32, vector<4xf32>, vector<4x8xf32> +} From c782ed3440b5a1565428db9731504fd1c4c2a9a9 Mon Sep 17 00:00:00 2001 From: Valery Pykhtin Date: Wed, 5 Nov 2025 13:31:10 +0100 Subject: [PATCH 07/39] [utils][UpdateLLCTestChecks] Add MIR support to update_llc_test_checks.py. (#164965) This change enables update_llc_test_checks.py to automatically generate MIR checks for RUN lines that use `-stop-before` or `-stop-after` flags allowing tests to verify intermediate compilation stages (e.g., after instruction selection but before peephole optimizations) alongside the final assembly output. If `-debug-only` flag is present in the run line it's considered as the main point of interest for testing and stop flags above are ignored (that is no MIR checks are generated). This resulted from the scenario, when I needed to test two instruction matching patterns where the later pattern in the peepholer reverts the earlier pattern in the instruction selector and distinguish it from the case when the earlier pattern didn't worked at all. Initially created by Claude Sonnet 4.5 it was improved later to handle conflicts in MIR <-> ASM prefixes and formatting. --- .../Inputs/x86_asm_mir_mixed.ll | 17 ++++ .../Inputs/x86_asm_mir_mixed.ll.expected | 45 +++++++++ .../Inputs/x86_asm_mir_same_prefix.ll | 13 +++ .../x86_asm_mir_same_prefix.ll.expected | 16 ++++ .../x86-asm-mir-mixed.test | 9 ++ .../x86-asm-mir-same-prefix.test | 7 ++ llvm/utils/UpdateTestChecks/common.py | 1 + llvm/utils/UpdateTestChecks/mir.py | 11 ++- llvm/utils/update_llc_test_checks.py | 94 +++++++++++++++---- 9 files changed, 194 insertions(+), 19 deletions(-) create mode 100644 llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_mixed.ll create mode 100644 llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_mixed.ll.expected create mode 100644 llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_same_prefix.ll create mode 100644 llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_same_prefix.ll.expected create mode 100644 llvm/test/tools/UpdateTestChecks/update_llc_test_checks/x86-asm-mir-mixed.test create mode 100644 llvm/test/tools/UpdateTestChecks/update_llc_test_checks/x86-asm-mir-same-prefix.test diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_mixed.ll b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_mixed.ll new file mode 100644 index 0000000000000..292637177591f --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_mixed.ll @@ -0,0 +1,17 @@ +; RUN: llc -mtriple=x86_64 < %s | FileCheck %s --check-prefix=ASM +; RUN: llc -mtriple=x86_64 -stop-after=finalize-isel < %s | FileCheck %s --check-prefix=MIR + +define i64 @test1(i64 %i) nounwind readnone { + %loc = alloca i64 + %j = load i64, ptr %loc + %r = add i64 %i, %j + ret i64 %r +} + +define i64 @test2(i32 %i) nounwind readnone { + %loc = alloca i32 + %j = load i32, ptr %loc + %r = add i32 %i, %j + %ext = zext i32 %r to i64 + ret i64 %ext +} diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_mixed.ll.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_mixed.ll.expected new file mode 100644 index 0000000000000..88cb03e85204a --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_mixed.ll.expected @@ -0,0 +1,45 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=x86_64 < %s | FileCheck %s --check-prefix=ASM +; RUN: llc -mtriple=x86_64 -stop-after=finalize-isel < %s | FileCheck %s --check-prefix=MIR + +define i64 @test1(i64 %i) nounwind readnone { +; ASM-LABEL: test1: +; ASM: # %bb.0: +; ASM-NEXT: movq %rdi, %rax +; ASM-NEXT: addq -{{[0-9]+}}(%rsp), %rax +; ASM-NEXT: retq +; MIR-LABEL: name: test1 +; MIR: bb.0 (%ir-block.0): +; MIR-NEXT: liveins: $rdi +; MIR-NEXT: {{ $}} +; MIR-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY $rdi +; MIR-NEXT: [[ADD64rm:%[0-9]+]]:gr64 = ADD64rm [[COPY]], %stack.0.loc, 1, $noreg, 0, $noreg, implicit-def dead $eflags :: (dereferenceable load (s64) from %ir.loc) +; MIR-NEXT: $rax = COPY [[ADD64rm]] +; MIR-NEXT: RET 0, $rax + %loc = alloca i64 + %j = load i64, ptr %loc + %r = add i64 %i, %j + ret i64 %r +} + +define i64 @test2(i32 %i) nounwind readnone { +; ASM-LABEL: test2: +; ASM: # %bb.0: +; ASM-NEXT: movl %edi, %eax +; ASM-NEXT: addl -{{[0-9]+}}(%rsp), %eax +; ASM-NEXT: retq +; MIR-LABEL: name: test2 +; MIR: bb.0 (%ir-block.0): +; MIR-NEXT: liveins: $edi +; MIR-NEXT: {{ $}} +; MIR-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $edi +; MIR-NEXT: [[ADD32rm:%[0-9]+]]:gr32 = ADD32rm [[COPY]], %stack.0.loc, 1, $noreg, 0, $noreg, implicit-def dead $eflags :: (dereferenceable load (s32) from %ir.loc) +; MIR-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gr64 = SUBREG_TO_REG 0, killed [[ADD32rm]], %subreg.sub_32bit +; MIR-NEXT: $rax = COPY [[SUBREG_TO_REG]] +; MIR-NEXT: RET 0, $rax + %loc = alloca i32 + %j = load i32, ptr %loc + %r = add i32 %i, %j + %ext = zext i32 %r to i64 + ret i64 %ext +} diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_same_prefix.ll b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_same_prefix.ll new file mode 100644 index 0000000000000..7167bcf258e68 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_same_prefix.ll @@ -0,0 +1,13 @@ +; RUN: llc -mtriple=x86_64 < %s | FileCheck %s --check-prefix=CHECK +; RUN: llc -mtriple=x86_64 -stop-after=finalize-isel < %s | FileCheck %s --check-prefix=CHECK + +define i32 @add(i32 %a, i32 %b) { + %sum = add i32 %a, %b + ret i32 %sum +} + +define i32 @sub(i32 %a, i32 %b) { + %diff = sub i32 %a, %b + ret i32 %diff +} + diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_same_prefix.ll.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_same_prefix.ll.expected new file mode 100644 index 0000000000000..1ba920d1de8b0 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_same_prefix.ll.expected @@ -0,0 +1,16 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=x86_64 < %s | FileCheck %s --check-prefix=CHECK +; RUN: llc -mtriple=x86_64 -stop-after=finalize-isel < %s | FileCheck %s --check-prefix=CHECK + +define i32 @add(i32 %a, i32 %b) { + %sum = add i32 %a, %b + ret i32 %sum +} + +define i32 @sub(i32 %a, i32 %b) { + %diff = sub i32 %a, %b + ret i32 %diff +} + +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/x86-asm-mir-mixed.test b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/x86-asm-mir-mixed.test new file mode 100644 index 0000000000000..6fc57b583b37d --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/x86-asm-mir-mixed.test @@ -0,0 +1,9 @@ +# REQUIRES: x86-registered-target +## Test checking that update_llc_test_checks.py can generate both ASM and MIR checks in the same file + +# RUN: cp -f %S/Inputs/x86_asm_mir_mixed.ll %t.ll && %update_llc_test_checks %t.ll +# RUN: diff -u %S/Inputs/x86_asm_mir_mixed.ll.expected %t.ll + +## Verify that running the script again on an already updated file doesn't add duplicate checks +# RUN: %update_llc_test_checks %t.ll +# RUN: diff -u %S/Inputs/x86_asm_mir_mixed.ll.expected %t.ll diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/x86-asm-mir-same-prefix.test b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/x86-asm-mir-same-prefix.test new file mode 100644 index 0000000000000..bb91a44678f1a --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/x86-asm-mir-same-prefix.test @@ -0,0 +1,7 @@ +## Test that using the same prefix for both ASM and MIR outputs generates a warning +## and doesn't produce any checks. + +# RUN: cp -f %S/Inputs/x86_asm_mir_same_prefix.ll %t.ll && %update_llc_test_checks %t.ll 2>&1 | FileCheck %s --check-prefix=WARNING +# RUN: diff -u %S/Inputs/x86_asm_mir_same_prefix.ll.expected %t.ll + +# WARNING: WARNING: The following prefixes are used for both ASM and MIR output, which will cause FileCheck failures: CHECK diff --git a/llvm/utils/UpdateTestChecks/common.py b/llvm/utils/UpdateTestChecks/common.py index 2dad16a8eebb7..baa0377cd8b81 100644 --- a/llvm/utils/UpdateTestChecks/common.py +++ b/llvm/utils/UpdateTestChecks/common.py @@ -605,6 +605,7 @@ def invoke_tool(exe, cmd_args, ir, preprocess_cmd=None, verbose=False): TRIPLE_ARG_RE = re.compile(r"-m?triple[= ]([^ ]+)") MARCH_ARG_RE = re.compile(r"-march[= ]([^ ]+)") DEBUG_ONLY_ARG_RE = re.compile(r"-debug-only[= ]([^ ]+)") +STOP_PASS_RE = re.compile(r"-stop-(before|after)=(\w+)") IS_DEBUG_RECORD_RE = re.compile(r"^(\s+)#dbg_") IS_SWITCH_CASE_RE = re.compile(r"^\s+i\d+ \d+, label %\S+") diff --git a/llvm/utils/UpdateTestChecks/mir.py b/llvm/utils/UpdateTestChecks/mir.py index 24bb8b341d335..01ee0e19f7cb9 100644 --- a/llvm/utils/UpdateTestChecks/mir.py +++ b/llvm/utils/UpdateTestChecks/mir.py @@ -163,13 +163,15 @@ def add_mir_checks_for_function( print_fixed_stack, first_check_is_next, at_the_function_name, + check_indent=None, ): printed_prefixes = set() for run in run_list: for prefix in run[0]: if prefix in printed_prefixes: break - if not func_dict[prefix][func_name]: + # func_info can be empty if there was a prefix conflict. + if not func_dict[prefix].get(func_name): continue if printed_prefixes: # Add some space between different check prefixes. @@ -185,6 +187,7 @@ def add_mir_checks_for_function( func_dict[prefix][func_name], print_fixed_stack, first_check_is_next, + check_indent, ) break else: @@ -204,6 +207,7 @@ def add_mir_check_lines( func_info, print_fixed_stack, first_check_is_next, + check_indent=None, ): func_body = str(func_info).splitlines() if single_bb: @@ -220,7 +224,10 @@ def add_mir_check_lines( first_line = func_body[0] indent = len(first_line) - len(first_line.lstrip(" ")) # A check comment, indented the appropriate amount - check = "{:>{}}; {}".format("", indent, prefix) + if check_indent is not None: + check = "{}; {}".format(check_indent, prefix) + else: + check = "{:>{}}; {}".format("", indent, prefix) output_lines.append("{}-LABEL: name: {}".format(check, func_name)) diff --git a/llvm/utils/update_llc_test_checks.py b/llvm/utils/update_llc_test_checks.py index 8c57e75f34f75..98864be62875b 100755 --- a/llvm/utils/update_llc_test_checks.py +++ b/llvm/utils/update_llc_test_checks.py @@ -15,7 +15,7 @@ import os # Used to advertise this file's name ("autogenerated_note"). import sys -from UpdateTestChecks import common +from UpdateTestChecks import common, mir # llc is the only llc-like in the LLVM tree but downstream forks can add # additional ones here if they have them. @@ -33,6 +33,7 @@ def update_test(ti: common.TestInfo): break run_list = [] + mir_run_list = [] for l in ti.run_lines: if "|" not in l: common.warn("Skipping unparsable RUN line: " + l) @@ -57,9 +58,14 @@ def update_test(ti: common.TestInfo): if m: march_in_cmd = m.groups()[0] + target_list = run_list m = common.DEBUG_ONLY_ARG_RE.search(llc_cmd) if m and m.groups()[0] == "isel": from UpdateTestChecks import isel as output_type + elif not m and common.STOP_PASS_RE.search(llc_cmd): + # MIR output mode. If -debug-only is present assume + # the debug output is the main point of interest. + target_list = mir_run_list else: from UpdateTestChecks import asm as output_type @@ -84,7 +90,7 @@ def update_test(ti: common.TestInfo): # FIXME: We should use multiple check prefixes to common check lines. For # now, we just ignore all but the last. - run_list.append( + target_list.append( ( check_prefixes, llc_tool, @@ -119,14 +125,20 @@ def update_test(ti: common.TestInfo): ginfo=ginfo, ) - for ( - prefixes, - llc_tool, - llc_args, - preprocess_cmd, - triple_in_cmd, - march_in_cmd, - ) in run_list: + # Dictionary to store MIR function bodies separately + mir_func_dict = {} + for run_tuple, is_mir in [(run, False) for run in run_list] + [ + (run, True) for run in mir_run_list + ]: + ( + prefixes, + llc_tool, + llc_args, + preprocess_cmd, + triple_in_cmd, + march_in_cmd, + ) = run_tuple + common.debug("Extracted LLC cmd:", llc_tool, llc_args) common.debug("Extracted FileCheck prefixes:", str(prefixes)) @@ -141,22 +153,54 @@ def update_test(ti: common.TestInfo): if not triple: triple = common.get_triple_from_march(march_in_cmd) - scrubber, function_re = output_type.get_run_handler(triple) - if 0 == builder.process_run_line( - function_re, scrubber, raw_tool_output, prefixes - ): - common.warn( - "Couldn't match any function. Possibly the wrong target triple has been provided" + if is_mir: + # MIR output mode + common.debug("Detected MIR output mode for prefixes:", str(prefixes)) + for prefix in prefixes: + if prefix not in mir_func_dict: + mir_func_dict[prefix] = {} + + mir.build_function_info_dictionary( + ti.path, + raw_tool_output, + triple, + prefixes, + mir_func_dict, + ti.args.verbose, ) - builder.processed_prefixes(prefixes) + else: + # ASM output mode + scrubber, function_re = output_type.get_run_handler(triple) + if 0 == builder.process_run_line( + function_re, scrubber, raw_tool_output, prefixes + ): + common.warn( + "Couldn't match any function. Possibly the wrong target triple has been provided" + ) + builder.processed_prefixes(prefixes) func_dict = builder.finish_and_get_func_dict() + + # Check for conflicts: same prefix used for both ASM and MIR + conflicting_prefixes = set(func_dict.keys()) & set(mir_func_dict.keys()) + if conflicting_prefixes: + common.warn( + "The following prefixes are used for both ASM and MIR output, which will cause FileCheck failures: {}".format( + ", ".join(sorted(conflicting_prefixes)) + ), + test_file=ti.path, + ) + for prefix in conflicting_prefixes: + mir_func_dict[prefix] = {} + func_dict[prefix] = {} + global_vars_seen_dict = {} is_in_function = False is_in_function_start = False func_name = None prefix_set = set([prefix for p in run_list for prefix in p[0]]) + prefix_set.update([prefix for p in mir_run_list for prefix in p[0]]) common.debug("Rewriting FileCheck prefixes:", str(prefix_set)) output_lines = [] @@ -221,6 +265,22 @@ def update_test(ti: common.TestInfo): is_filtered=builder.is_filtered(), ) ) + + # Also add MIR checks if we have them for this function + if mir_run_list and func_name: + mir.add_mir_checks_for_function( + ti.path, + output_lines, + mir_run_list, + mir_func_dict, + func_name, + single_bb=False, # Don't skip basic block labels. + print_fixed_stack=False, # Don't print fixed stack (ASM tests don't need it). + first_check_is_next=False, # First check is LABEL, not NEXT. + at_the_function_name=False, # Use "name:" not "@name". + check_indent="", # No indentation for IR files (not MIR files). + ) + is_in_function_start = False if is_in_function: From a4105707eeaf53c13c2f09298d762995267c7717 Mon Sep 17 00:00:00 2001 From: hev Date: Wed, 5 Nov 2025 20:36:38 +0800 Subject: [PATCH 08/39] [llvm][LoongArch] Introduce LASX and LSX conversion intrinsics (#157818) This patch introduces the LASX and LSX conversion intrinsics: - <8 x float> @llvm.loongarch.lasx.cast.128.s(<4 x float>) - <4 x double> @llvm.loongarch.lasx.cast.128.d(<2 x double>) - <4 x i64> @llvm.loongarch.lasx.cast.128(<2 x i64>) - <8 x float> @llvm.loongarch.lasx.concat.128.s(<4 x float>, <4 x float>) - <4 x double> @llvm.loongarch.lasx.concat.128.d(<2 x double>, <2 x double>) - <4 x i64> @llvm.loongarch.lasx.concat.128(<2 x i64>, <2 x i64>) - <4 x float> @llvm.loongarch.lasx.extract.128.lo.s(<8 x float>) - <2 x double> @llvm.loongarch.lasx.extract.128.lo.d(<4 x double>) - <2 x i64> @llvm.loongarch.lasx.extract.128.lo(<4 x i64>) - <4 x float> @llvm.loongarch.lasx.extract.128.hi.s(<8 x float>) - <2 x double> @llvm.loongarch.lasx.extract.128.hi.d(<4 x double>) - <2 x i64> @llvm.loongarch.lasx.extract.128.hi(<4 x i64>) - <8 x float> @llvm.loongarch.lasx.insert.128.lo.s(<8 x float>, <4 x float>) - <4 x double> @llvm.loongarch.lasx.insert.128.lo.d(<4 x double>, <2 x double>) - <4 x i64> @llvm.loongarch.lasx.insert.128.lo(<4 x i64>, <2 x i64>) - <8 x float> @llvm.loongarch.lasx.insert.128.hi.s(<8 x float>, <4 x float>) - <4 x double> @llvm.loongarch.lasx.insert.128.hi.d(<4 x double>, <2 x double>) - <4 x i64> @llvm.loongarch.lasx.insert.128.hi(<4 x i64>, <2 x i64>) --- llvm/include/llvm/IR/IntrinsicsLoongArch.td | 38 +++ .../LoongArch/LoongArchISelLowering.cpp | 5 + .../LoongArch/LoongArchLASXInstrInfo.td | 31 ++ .../LoongArch/lasx/intrinsic-conversion.ll | 303 ++++++++++++++++++ 4 files changed, 377 insertions(+) create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-conversion.ll diff --git a/llvm/include/llvm/IR/IntrinsicsLoongArch.td b/llvm/include/llvm/IR/IntrinsicsLoongArch.td index 84026aa9d3624..1c46965d995fe 100644 --- a/llvm/include/llvm/IR/IntrinsicsLoongArch.td +++ b/llvm/include/llvm/IR/IntrinsicsLoongArch.td @@ -1192,4 +1192,42 @@ def int_loongarch_lasx_xvstelm_w def int_loongarch_lasx_xvstelm_d : VecInt<[], [llvm_v4i64_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrWriteMem, IntrArgMemOnly, ImmArg>, ImmArg>]>; + +// LASX and LSX conversion +def int_loongarch_lasx_cast_128_s + : VecInt<[llvm_v8f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_cast_128_d + : VecInt<[llvm_v4f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; +def int_loongarch_lasx_cast_128 + : VecInt<[llvm_v4i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; +def int_loongarch_lasx_concat_128_s + : VecInt<[llvm_v8f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_concat_128_d + : VecInt<[llvm_v4f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; +def int_loongarch_lasx_concat_128 + : VecInt<[llvm_v4i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; +def int_loongarch_lasx_extract_128_lo_s + : VecInt<[llvm_v4f32_ty], [llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_extract_128_lo_d + : VecInt<[llvm_v2f64_ty], [llvm_v4f64_ty], [IntrNoMem]>; +def int_loongarch_lasx_extract_128_lo + : VecInt<[llvm_v2i64_ty], [llvm_v4i64_ty], [IntrNoMem]>; +def int_loongarch_lasx_extract_128_hi_s + : VecInt<[llvm_v4f32_ty], [llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_extract_128_hi_d + : VecInt<[llvm_v2f64_ty], [llvm_v4f64_ty], [IntrNoMem]>; +def int_loongarch_lasx_extract_128_hi + : VecInt<[llvm_v2i64_ty], [llvm_v4i64_ty], [IntrNoMem]>; +def int_loongarch_lasx_insert_128_lo_s + : VecInt<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_insert_128_lo_d + : VecInt<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v2f64_ty], [IntrNoMem]>; +def int_loongarch_lasx_insert_128_lo + : VecInt<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v2i64_ty], [IntrNoMem]>; +def int_loongarch_lasx_insert_128_hi_s + : VecInt<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_insert_128_hi_d + : VecInt<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v2f64_ty], [IntrNoMem]>; +def int_loongarch_lasx_insert_128_hi + : VecInt<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v2i64_ty], [IntrNoMem]>; } // TargetPrefix = "loongarch" diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index fe700e17d341b..cf4ffc82f6009 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -6630,6 +6630,11 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, return DAG.getNode(LoongArchISD::VANY_NONZERO, DL, N->getValueType(0), N->getOperand(1)); break; + case Intrinsic::loongarch_lasx_concat_128_s: + case Intrinsic::loongarch_lasx_concat_128_d: + case Intrinsic::loongarch_lasx_concat_128: + return DAG.getNode(ISD::CONCAT_VECTORS, DL, N->getValueType(0), + N->getOperand(1), N->getOperand(2)); } return SDValue(); } diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td index b502b056c4cdf..00d52870f1727 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td @@ -2113,6 +2113,37 @@ defm : subvector_subreg_lowering; defm : subvector_subreg_lowering; defm : subvector_subreg_lowering; +// LASX and LSX conversion +def : Pat<(int_loongarch_lasx_cast_128_s (v4f32 LSX128:$src)), + (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$src, sub_128)>; +def : Pat<(int_loongarch_lasx_cast_128_d (v2f64 LSX128:$src)), + (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$src, sub_128)>; +def : Pat<(int_loongarch_lasx_cast_128 (v2i64 LSX128:$src)), + (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$src, sub_128)>; +def : Pat<(int_loongarch_lasx_extract_128_lo_s (v8f32 LASX256:$src)), + (EXTRACT_SUBREG LASX256:$src, sub_128)>; +def : Pat<(int_loongarch_lasx_extract_128_lo_d (v4f64 LASX256:$src)), + (EXTRACT_SUBREG LASX256:$src, sub_128)>; +def : Pat<(int_loongarch_lasx_extract_128_lo (v4i64 LASX256:$src)), + (EXTRACT_SUBREG LASX256:$src, sub_128)>; +def : Pat<(int_loongarch_lasx_extract_128_hi_s (v8f32 LASX256:$src)), + (EXTRACT_SUBREG (XVPERMI_Q (IMPLICIT_DEF), LASX256:$src, 1), sub_128)>; +def : Pat<(int_loongarch_lasx_extract_128_hi_d (v4f64 LASX256:$src)), + (EXTRACT_SUBREG (XVPERMI_Q (IMPLICIT_DEF), LASX256:$src, 1), sub_128)>; +def : Pat<(int_loongarch_lasx_extract_128_hi (v4i64 LASX256:$src)), + (EXTRACT_SUBREG (XVPERMI_Q (IMPLICIT_DEF), LASX256:$src, 1), sub_128)>; +def : Pat<(int_loongarch_lasx_insert_128_lo_s (v8f32 LASX256:$src), (v4f32 LSX128:$lo)), + (XVPERMI_Q LASX256:$src, (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$lo, sub_128), 48)>; +def : Pat<(int_loongarch_lasx_insert_128_lo_d (v4f64 LASX256:$src), (v2f64 LSX128:$lo)), + (XVPERMI_Q LASX256:$src, (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$lo, sub_128), 48)>; +def : Pat<(int_loongarch_lasx_insert_128_lo (v4i64 LASX256:$src), (v2i64 LSX128:$lo)), + (XVPERMI_Q LASX256:$src, (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$lo, sub_128), 48)>; +def : Pat<(int_loongarch_lasx_insert_128_hi_s (v8f32 LASX256:$src), (v4f32 LSX128:$lo)), + (XVPERMI_Q LASX256:$src, (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$lo, sub_128), 2)>; +def : Pat<(int_loongarch_lasx_insert_128_hi_d (v4f64 LASX256:$src), (v2f64 LSX128:$lo)), + (XVPERMI_Q LASX256:$src, (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$lo, sub_128), 2)>; +def : Pat<(int_loongarch_lasx_insert_128_hi (v4i64 LASX256:$src), (v2i64 LSX128:$lo)), + (XVPERMI_Q LASX256:$src, (INSERT_SUBREG (IMPLICIT_DEF), LSX128:$lo, sub_128), 2)>; } // Predicates = [HasExtLASX] /// Intrinsic pattern diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-conversion.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-conversion.ll new file mode 100644 index 0000000000000..006713ccabf47 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-conversion.ll @@ -0,0 +1,303 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x float> @llvm.loongarch.lasx.cast.128.s(<4 x float>) + +define void @lasx_cast_128_s(ptr %vd, ptr %va) { +; CHECK-LABEL: lasx_cast_128_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <4 x float>, ptr %va + %b = call <8 x float> @llvm.loongarch.lasx.cast.128.s(<4 x float> %a) + store <8 x float> %b, ptr %vd + ret void +} + +declare <4 x double> @llvm.loongarch.lasx.cast.128.d(<2 x double>) + +define void @lasx_cast_128_d(ptr %vd, ptr %va) { +; CHECK-LABEL: lasx_cast_128_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <2 x double>, ptr %va + %b = call <4 x double> @llvm.loongarch.lasx.cast.128.d(<2 x double> %a) + store <4 x double> %b, ptr %vd + ret void +} + +declare <4 x i64> @llvm.loongarch.lasx.cast.128(<2 x i64>) + +define void @lasx_cast_128(ptr %vd, ptr %va) { +; CHECK-LABEL: lasx_cast_128: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <2 x i64>, ptr %va + %b = call <4 x i64> @llvm.loongarch.lasx.cast.128(<2 x i64> %a) + store <4 x i64> %b, ptr %vd + ret void +} + +declare <8 x float> @llvm.loongarch.lasx.concat.128.s(<4 x float>, <4 x float>) + +define void @lasx_concat_128_s(ptr %vd, ptr %va, ptr %vb) { +; CHECK-LABEL: lasx_concat_128_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <4 x float>, ptr %va + %b = load <4 x float>, ptr %vb + %c = call <8 x float> @llvm.loongarch.lasx.concat.128.s(<4 x float> %a, <4 x float> %b) + store <8 x float> %c, ptr %vd + ret void +} + +declare <4 x double> @llvm.loongarch.lasx.concat.128.d(<2 x double>, <2 x double>) + +define void @lasx_concat_128_d(ptr %vd, ptr %va, ptr %vb) { +; CHECK-LABEL: lasx_concat_128_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <2 x double>, ptr %va + %b = load <2 x double>, ptr %vb + %c = call <4 x double> @llvm.loongarch.lasx.concat.128.d(<2 x double> %a, <2 x double> %b) + store <4 x double> %c, ptr %vd + ret void +} + +declare <4 x i64> @llvm.loongarch.lasx.concat.128(<2 x i64>, <2 x i64>) + +define void @lasx_concat_128(ptr %vd, ptr %va, ptr %vb) { +; CHECK-LABEL: lasx_concat_128: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <2 x i64>, ptr %va + %b = load <2 x i64>, ptr %vb + %c = call <4 x i64> @llvm.loongarch.lasx.concat.128(<2 x i64> %a, <2 x i64> %b) + store <4 x i64> %c, ptr %vd + ret void +} + +declare <4 x float> @llvm.loongarch.lasx.extract.128.lo.s(<8 x float>) + +define void @lasx_extract_128_lo_s(ptr %vd, ptr %va) { +; CHECK-LABEL: lasx_extract_128_lo_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <8 x float>, ptr %va + %c = call <4 x float> @llvm.loongarch.lasx.extract.128.lo.s(<8 x float> %a) + store <4 x float> %c, ptr %vd + ret void +} + +declare <2 x double> @llvm.loongarch.lasx.extract.128.lo.d(<4 x double>) + +define void @lasx_extract_128_lo_d(ptr %vd, ptr %va) { +; CHECK-LABEL: lasx_extract_128_lo_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <4 x double>, ptr %va + %c = call <2 x double> @llvm.loongarch.lasx.extract.128.lo.d(<4 x double> %a) + store <2 x double> %c, ptr %vd + ret void +} + +declare <2 x i64> @llvm.loongarch.lasx.extract.128.lo(<4 x i64>) + +define void @lasx_extract_128_lo(ptr %vd, ptr %va) { +; CHECK-LABEL: lasx_extract_128_lo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <4 x i64>, ptr %va + %c = call <2 x i64> @llvm.loongarch.lasx.extract.128.lo(<4 x i64> %a) + store <2 x i64> %c, ptr %vd + ret void +} + +declare <4 x float> @llvm.loongarch.lasx.extract.128.hi.s(<8 x float>) + +define void @lasx_extract_128_hi_s(ptr %vd, ptr %va) { +; CHECK-LABEL: lasx_extract_128_hi_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <8 x float>, ptr %va + %c = call <4 x float> @llvm.loongarch.lasx.extract.128.hi.s(<8 x float> %a) + store <4 x float> %c, ptr %vd + ret void +} + +declare <2 x double> @llvm.loongarch.lasx.extract.128.hi.d(<4 x double>) + +define void @lasx_extract_128_hi_d(ptr %vd, ptr %va) { +; CHECK-LABEL: lasx_extract_128_hi_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <4 x double>, ptr %va + %c = call <2 x double> @llvm.loongarch.lasx.extract.128.hi.d(<4 x double> %a) + store <2 x double> %c, ptr %vd + ret void +} + +declare <2 x i64> @llvm.loongarch.lasx.extract.128.hi(<4 x i64>) + +define void @lasx_extract_128_hi(ptr %vd, ptr %va) { +; CHECK-LABEL: lasx_extract_128_hi: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <4 x i64>, ptr %va + %c = call <2 x i64> @llvm.loongarch.lasx.extract.128.hi(<4 x i64> %a) + store <2 x i64> %c, ptr %vd + ret void +} + +declare <8 x float> @llvm.loongarch.lasx.insert.128.lo.s(<8 x float>, <4 x float>) + +define void @lasx_insert_128_lo_s(ptr %vd, ptr %va, ptr %vb) { +; CHECK-LABEL: lasx_insert_128_lo_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 48 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <8 x float>, ptr %va + %b = load <4 x float>, ptr %vb + %c = call <8 x float> @llvm.loongarch.lasx.insert.128.lo.s(<8 x float> %a, <4 x float> %b) + store <8 x float> %c, ptr %vd + ret void +} + +declare <4 x double> @llvm.loongarch.lasx.insert.128.lo.d(<4 x double>, <2 x double>) + +define void @lasx_insert_128_lo_d(ptr %vd, ptr %va, ptr %vb) { +; CHECK-LABEL: lasx_insert_128_lo_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 48 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <4 x double>, ptr %va + %b = load <2 x double>, ptr %vb + %c = call <4 x double> @llvm.loongarch.lasx.insert.128.lo.d(<4 x double> %a, <2 x double> %b) + store <4 x double> %c, ptr %vd + ret void +} + +declare <4 x i64> @llvm.loongarch.lasx.insert.128.lo(<4 x i64>, <2 x i64>) + +define void @lasx_insert_128_lo(ptr %vd, ptr %va, ptr %vb) { +; CHECK-LABEL: lasx_insert_128_lo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 48 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <4 x i64>, ptr %va + %b = load <2 x i64>, ptr %vb + %c = call <4 x i64> @llvm.loongarch.lasx.insert.128.lo(<4 x i64> %a, <2 x i64> %b) + store <4 x i64> %c, ptr %vd + ret void +} + +declare <8 x float> @llvm.loongarch.lasx.insert.128.hi.s(<8 x float>, <4 x float>) + +define void @lasx_insert_128_hi_s(ptr %vd, ptr %va, ptr %vb) { +; CHECK-LABEL: lasx_insert_128_hi_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <8 x float>, ptr %va + %b = load <4 x float>, ptr %vb + %c = call <8 x float> @llvm.loongarch.lasx.insert.128.hi.s(<8 x float> %a, <4 x float> %b) + store <8 x float> %c, ptr %vd + ret void +} + +declare <4 x double> @llvm.loongarch.lasx.insert.128.hi.d(<4 x double>, <2 x double>) + +define void @lasx_insert_128_hi_d(ptr %vd, ptr %va, ptr %vb) { +; CHECK-LABEL: lasx_insert_128_hi_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <4 x double>, ptr %va + %b = load <2 x double>, ptr %vb + %c = call <4 x double> @llvm.loongarch.lasx.insert.128.hi.d(<4 x double> %a, <2 x double> %b) + store <4 x double> %c, ptr %vd + ret void +} + +declare <4 x i64> @llvm.loongarch.lasx.insert.128.hi(<4 x i64>, <2 x i64>) + +define void @lasx_insert_128_hi(ptr %vd, ptr %va, ptr %vb) { +; CHECK-LABEL: lasx_insert_128_hi: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %a = load <4 x i64>, ptr %va + %b = load <2 x i64>, ptr %vb + %c = call <4 x i64> @llvm.loongarch.lasx.insert.128.hi(<4 x i64> %a, <2 x i64> %b) + store <4 x i64> %c, ptr %vd + ret void +} From 04c01f066032c92cbd48b9c26cf97426f8f38058 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Wed, 5 Nov 2025 07:39:42 -0500 Subject: [PATCH 09/39] [gn] port 0c7300923638403 --- .../gn/secondary/llvm/lib/Target/WebAssembly/BUILD.gn | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/WebAssembly/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/WebAssembly/BUILD.gn index 2ece91331c5d8..11a57fcb008cd 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Target/WebAssembly/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/WebAssembly/BUILD.gn @@ -12,10 +12,17 @@ tablegen("WebAssemblyGenFastISel") { td_file = "WebAssembly.td" } +tablegen("WebAssemblyGenSDNodeInfo") { + visibility = [ ":LLVMWebAssemblyCodeGen" ] + args = [ "-gen-sd-node-info" ] + td_file = "WebAssembly.td" +} + static_library("LLVMWebAssemblyCodeGen") { deps = [ ":WebAssemblyGenDAGISel", ":WebAssemblyGenFastISel", + ":WebAssemblyGenSDNodeInfo", "MCTargetDesc", "TargetInfo", "//llvm/include/llvm/Config:llvm-config", From 0d77cba6e165283283431f0d9566d0e0e06e63d7 Mon Sep 17 00:00:00 2001 From: Muhammad Bassiouni <60100307+bassiounix@users.noreply.github.com> Date: Wed, 5 Nov 2025 14:50:40 +0200 Subject: [PATCH 10/39] [libc][math] Refactor exp2m1f16 implementation to header-only in src/__support/math folder. (#162019) Part of #147386 in preparation for: https://discourse.llvm.org/t/rfc-make-clang-builtin-math-functions-constexpr-with-llvm-libc-to-support-c-23-constexpr-math-functions/86450 --- libc/shared/math.h | 1 + libc/shared/math/exp2m1f16.h | 29 +++ libc/src/__support/math/CMakeLists.txt | 18 ++ libc/src/__support/math/exp2m1f16.h | 180 ++++++++++++++++++ libc/src/math/generic/CMakeLists.txt | 14 +- libc/src/math/generic/exp2m1f16.cpp | 155 +-------------- libc/test/shared/CMakeLists.txt | 1 + libc/test/shared/shared_math_test.cpp | 1 + .../llvm-project-overlay/libc/BUILD.bazel | 18 +- 9 files changed, 250 insertions(+), 167 deletions(-) create mode 100644 libc/shared/math/exp2m1f16.h create mode 100644 libc/src/__support/math/exp2m1f16.h diff --git a/libc/shared/math.h b/libc/shared/math.h index bd6aee73c3933..282dd6243d6a7 100644 --- a/libc/shared/math.h +++ b/libc/shared/math.h @@ -51,6 +51,7 @@ #include "math/exp2f.h" #include "math/exp2f16.h" #include "math/exp2m1f.h" +#include "math/exp2m1f16.h" #include "math/expf.h" #include "math/expf16.h" #include "math/frexpf.h" diff --git a/libc/shared/math/exp2m1f16.h b/libc/shared/math/exp2m1f16.h new file mode 100644 index 0000000000000..96a404708be18 --- /dev/null +++ b/libc/shared/math/exp2m1f16.h @@ -0,0 +1,29 @@ +//===-- Shared exp2m1f16 function -------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SHARED_MATH_EXP2M1F16_H +#define LLVM_LIBC_SHARED_MATH_EXP2M1F16_H + +#include "include/llvm-libc-macros/float16-macros.h" +#include "shared/libc_common.h" + +#ifdef LIBC_TYPES_HAS_FLOAT16 + +#include "src/__support/math/exp2m1f16.h" + +namespace LIBC_NAMESPACE_DECL { +namespace shared { + +using math::exp2m1f16; + +} // namespace shared +} // namespace LIBC_NAMESPACE_DECL + +#endif // LIBC_TYPES_HAS_FLOAT16 + +#endif // LLVM_LIBC_SHARED_MATH_EXP2M1F16_H diff --git a/libc/src/__support/math/CMakeLists.txt b/libc/src/__support/math/CMakeLists.txt index 620900028d424..ddc0159b10ce4 100644 --- a/libc/src/__support/math/CMakeLists.txt +++ b/libc/src/__support/math/CMakeLists.txt @@ -769,6 +769,24 @@ add_header_library( libc.src.__support.macros.properties.cpu_features ) +add_header_library( + exp2m1f16 + HDRS + exp2m1f16.h + DEPENDS + .expxf16_utils + libc.src.__support.common + libc.src.__support.FPUtil.cast + libc.src.__support.FPUtil.except_value_utils + libc.src.__support.FPUtil.fenv_impl + libc.src.__support.FPUtil.fp_bits + libc.src.__support.FPUtil.multiply_add + libc.src.__support.FPUtil.polyeval + libc.src.__support.FPUtil.rounding_mode + libc.src.__support.macros.optimization + libc.src.__support.macros.properties.cpu_features +) + add_header_library( exp10 HDRS diff --git a/libc/src/__support/math/exp2m1f16.h b/libc/src/__support/math/exp2m1f16.h new file mode 100644 index 0000000000000..0424af4aa953d --- /dev/null +++ b/libc/src/__support/math/exp2m1f16.h @@ -0,0 +1,180 @@ +//===-- Implementation header for exp2m1f16 ----------------------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_EXP2M1F16_H +#define LLVM_LIBC_SRC___SUPPORT_MATH_EXP2M1F16_H + +#include "include/llvm-libc-macros/float16-macros.h" + +#ifdef LIBC_TYPES_HAS_FLOAT16 + +#include "src/__support/FPUtil/FEnvImpl.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/PolyEval.h" +#include "src/__support/FPUtil/cast.h" +#include "src/__support/FPUtil/except_value_utils.h" +#include "src/__support/FPUtil/multiply_add.h" +#include "src/__support/FPUtil/rounding_mode.h" +#include "src/__support/macros/config.h" +#include "src/__support/macros/optimization.h" +#include "src/__support/macros/properties/cpu_features.h" +#include "src/__support/math/expxf16_utils.h" + +namespace LIBC_NAMESPACE_DECL { + +namespace math { + +LIBC_INLINE static constexpr float16 exp2m1f16(float16 x) { +#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS + constexpr fputil::ExceptValues EXP2M1F16_EXCEPTS_LO = {{ + // (input, RZ output, RU offset, RD offset, RN offset) + // x = 0x1.cf4p-13, exp2m1f16(x) = 0x1.41p-13 (RZ) + {0x0b3dU, 0x0904U, 1U, 0U, 1U}, + // x = 0x1.4fcp-12, exp2m1f16(x) = 0x1.d14p-13 (RZ) + {0x0d3fU, 0x0b45U, 1U, 0U, 1U}, + // x = 0x1.63p-11, exp2m1f16(x) = 0x1.ec4p-12 (RZ) + {0x118cU, 0x0fb1U, 1U, 0U, 0U}, + // x = 0x1.6fp-7, exp2m1f16(x) = 0x1.fe8p-8 (RZ) + {0x21bcU, 0x1ffaU, 1U, 0U, 1U}, + // x = -0x1.c6p-10, exp2m1f16(x) = -0x1.3a8p-10 (RZ) + {0x9718U, 0x94eaU, 0U, 1U, 0U}, + // x = -0x1.cfcp-10, exp2m1f16(x) = -0x1.414p-10 (RZ) + {0x973fU, 0x9505U, 0U, 1U, 0U}, + }}; + +#ifdef LIBC_TARGET_CPU_HAS_FMA_FLOAT + constexpr size_t N_EXP2M1F16_EXCEPTS_HI = 6; +#else + constexpr size_t N_EXP2M1F16_EXCEPTS_HI = 7; +#endif + + constexpr fputil::ExceptValues + EXP2M1F16_EXCEPTS_HI = {{ + // (input, RZ output, RU offset, RD offset, RN offset) + // x = 0x1.e58p-3, exp2m1f16(x) = 0x1.6dcp-3 (RZ) + {0x3396U, 0x31b7U, 1U, 0U, 0U}, +#ifndef LIBC_TARGET_CPU_HAS_FMA_FLOAT + // x = 0x1.2e8p-2, exp2m1f16(x) = 0x1.d14p-3 (RZ) + {0x34baU, 0x3345U, 1U, 0U, 0U}, +#endif + // x = 0x1.ad8p-2, exp2m1f16(x) = 0x1.598p-2 (RZ) + {0x36b6U, 0x3566U, 1U, 0U, 0U}, +#ifdef LIBC_TARGET_CPU_HAS_FMA_FLOAT + // x = 0x1.edcp-2, exp2m1f16(x) = 0x1.964p-2 (RZ) + {0x37b7U, 0x3659U, 1U, 0U, 1U}, +#endif + // x = -0x1.804p-3, exp2m1f16(x) = -0x1.f34p-4 (RZ) + {0xb201U, 0xafcdU, 0U, 1U, 1U}, + // x = -0x1.f3p-3, exp2m1f16(x) = -0x1.3e4p-3 (RZ) + {0xb3ccU, 0xb0f9U, 0U, 1U, 0U}, + // x = -0x1.294p-1, exp2m1f16(x) = -0x1.53p-2 (RZ) + {0xb8a5U, 0xb54cU, 0U, 1U, 1U}, +#ifndef LIBC_TARGET_CPU_HAS_FMA_FLOAT + // x = -0x1.a34p-1, exp2m1f16(x) = -0x1.bb4p-2 (RZ) + {0xba8dU, 0xb6edU, 0U, 1U, 1U}, +#endif + }}; +#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS + + using namespace math::expxf16_internal; + using FPBits = fputil::FPBits; + FPBits x_bits(x); + + uint16_t x_u = x_bits.uintval(); + uint16_t x_abs = x_u & 0x7fffU; + + // When |x| <= 2^(-3), or |x| >= 11, or x is NaN. + if (LIBC_UNLIKELY(x_abs <= 0x3000U || x_abs >= 0x4980U)) { + // exp2m1(NaN) = NaN + if (x_bits.is_nan()) { + if (x_bits.is_signaling_nan()) { + fputil::raise_except_if_required(FE_INVALID); + return FPBits::quiet_nan().get_val(); + } + + return x; + } + + // When x >= 16. + if (x_u >= 0x4c00 && x_bits.is_pos()) { + // exp2m1(+inf) = +inf + if (x_bits.is_inf()) + return FPBits::inf().get_val(); + + switch (fputil::quick_get_round()) { + case FE_TONEAREST: + case FE_UPWARD: + fputil::set_errno_if_required(ERANGE); + fputil::raise_except_if_required(FE_OVERFLOW | FE_INEXACT); + return FPBits::inf().get_val(); + default: + return FPBits::max_normal().get_val(); + } + } + + // When x < -11. + if (x_u > 0xc980U) { + // exp2m1(-inf) = -1 + if (x_bits.is_inf()) + return FPBits::one(Sign::NEG).get_val(); + + // When -12 < x < -11, round(2^x - 1, HP, RN) = -0x1.ffcp-1. + if (x_u < 0xca00U) + return fputil::round_result_slightly_down( + fputil::cast(-0x1.ffcp-1)); + + // When x <= -12, round(2^x - 1, HP, RN) = -1. + switch (fputil::quick_get_round()) { + case FE_TONEAREST: + case FE_DOWNWARD: + return FPBits::one(Sign::NEG).get_val(); + default: + return fputil::cast(-0x1.ffcp-1); + } + } + + // When |x| <= 2^(-3). + if (x_abs <= 0x3000U) { +#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS + if (auto r = EXP2M1F16_EXCEPTS_LO.lookup(x_u); + LIBC_UNLIKELY(r.has_value())) + return r.value(); +#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS + + float xf = x; + // Degree-5 minimax polynomial generated by Sollya with the following + // commands: + // > display = hexadecimal; + // > P = fpminimax((2^x - 1)/x, 4, [|SG...|], [-2^-3, 2^-3]); + // > x * P; + return fputil::cast( + xf * fputil::polyeval(xf, 0x1.62e43p-1f, 0x1.ebfbdep-3f, + 0x1.c6af88p-5f, 0x1.3b45d6p-7f, + 0x1.641e7cp-10f)); + } + } + +#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS + if (auto r = EXP2M1F16_EXCEPTS_HI.lookup(x_u); LIBC_UNLIKELY(r.has_value())) + return r.value(); +#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS + + // exp2(x) = exp2(hi + mid) * exp2(lo) + auto [exp2_hi_mid, exp2_lo] = exp2_range_reduction(x); + // exp2m1(x) = exp2(hi + mid) * exp2(lo) - 1 + return fputil::cast( + fputil::multiply_add(exp2_hi_mid, exp2_lo, -1.0f)); +} + +} // namespace math + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LIBC_TYPES_HAS_FLOAT16 + +#endif // LLVM_LIBC_SRC___SUPPORT_MATH_EXP2M1F16_H diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index c048a64db6bc2..e71300536616b 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -1498,19 +1498,7 @@ add_entrypoint_object( HDRS ../exp2m1f16.h DEPENDS - libc.hdr.errno_macros - libc.hdr.fenv_macros - libc.src.__support.common - libc.src.__support.FPUtil.cast - libc.src.__support.FPUtil.except_value_utils - libc.src.__support.FPUtil.fenv_impl - libc.src.__support.FPUtil.fp_bits - libc.src.__support.FPUtil.multiply_add - libc.src.__support.FPUtil.polyeval - libc.src.__support.FPUtil.rounding_mode - libc.src.__support.macros.optimization - libc.src.__support.macros.properties.cpu_features - libc.src.__support.math.expxf16_utils + libc.src.__support.math.exp2m1f16 ) add_entrypoint_object( diff --git a/libc/src/math/generic/exp2m1f16.cpp b/libc/src/math/generic/exp2m1f16.cpp index ce0cc60748f19..497a2887cea4c 100644 --- a/libc/src/math/generic/exp2m1f16.cpp +++ b/libc/src/math/generic/exp2m1f16.cpp @@ -7,163 +7,12 @@ //===----------------------------------------------------------------------===// #include "src/math/exp2m1f16.h" -#include "hdr/errno_macros.h" -#include "hdr/fenv_macros.h" -#include "src/__support/FPUtil/FEnvImpl.h" -#include "src/__support/FPUtil/FPBits.h" -#include "src/__support/FPUtil/PolyEval.h" -#include "src/__support/FPUtil/cast.h" -#include "src/__support/FPUtil/except_value_utils.h" -#include "src/__support/FPUtil/multiply_add.h" -#include "src/__support/FPUtil/rounding_mode.h" -#include "src/__support/common.h" -#include "src/__support/macros/config.h" -#include "src/__support/macros/optimization.h" -#include "src/__support/macros/properties/cpu_features.h" -#include "src/__support/math/expxf16_utils.h" +#include "src/__support/math/exp2m1f16.h" namespace LIBC_NAMESPACE_DECL { -#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS -static constexpr fputil::ExceptValues EXP2M1F16_EXCEPTS_LO = {{ - // (input, RZ output, RU offset, RD offset, RN offset) - // x = 0x1.cf4p-13, exp2m1f16(x) = 0x1.41p-13 (RZ) - {0x0b3dU, 0x0904U, 1U, 0U, 1U}, - // x = 0x1.4fcp-12, exp2m1f16(x) = 0x1.d14p-13 (RZ) - {0x0d3fU, 0x0b45U, 1U, 0U, 1U}, - // x = 0x1.63p-11, exp2m1f16(x) = 0x1.ec4p-12 (RZ) - {0x118cU, 0x0fb1U, 1U, 0U, 0U}, - // x = 0x1.6fp-7, exp2m1f16(x) = 0x1.fe8p-8 (RZ) - {0x21bcU, 0x1ffaU, 1U, 0U, 1U}, - // x = -0x1.c6p-10, exp2m1f16(x) = -0x1.3a8p-10 (RZ) - {0x9718U, 0x94eaU, 0U, 1U, 0U}, - // x = -0x1.cfcp-10, exp2m1f16(x) = -0x1.414p-10 (RZ) - {0x973fU, 0x9505U, 0U, 1U, 0U}, -}}; - -#ifdef LIBC_TARGET_CPU_HAS_FMA_FLOAT -static constexpr size_t N_EXP2M1F16_EXCEPTS_HI = 6; -#else -static constexpr size_t N_EXP2M1F16_EXCEPTS_HI = 7; -#endif - -static constexpr fputil::ExceptValues - EXP2M1F16_EXCEPTS_HI = {{ - // (input, RZ output, RU offset, RD offset, RN offset) - // x = 0x1.e58p-3, exp2m1f16(x) = 0x1.6dcp-3 (RZ) - {0x3396U, 0x31b7U, 1U, 0U, 0U}, -#ifndef LIBC_TARGET_CPU_HAS_FMA_FLOAT - // x = 0x1.2e8p-2, exp2m1f16(x) = 0x1.d14p-3 (RZ) - {0x34baU, 0x3345U, 1U, 0U, 0U}, -#endif - // x = 0x1.ad8p-2, exp2m1f16(x) = 0x1.598p-2 (RZ) - {0x36b6U, 0x3566U, 1U, 0U, 0U}, -#ifdef LIBC_TARGET_CPU_HAS_FMA_FLOAT - // x = 0x1.edcp-2, exp2m1f16(x) = 0x1.964p-2 (RZ) - {0x37b7U, 0x3659U, 1U, 0U, 1U}, -#endif - // x = -0x1.804p-3, exp2m1f16(x) = -0x1.f34p-4 (RZ) - {0xb201U, 0xafcdU, 0U, 1U, 1U}, - // x = -0x1.f3p-3, exp2m1f16(x) = -0x1.3e4p-3 (RZ) - {0xb3ccU, 0xb0f9U, 0U, 1U, 0U}, - // x = -0x1.294p-1, exp2m1f16(x) = -0x1.53p-2 (RZ) - {0xb8a5U, 0xb54cU, 0U, 1U, 1U}, -#ifndef LIBC_TARGET_CPU_HAS_FMA_FLOAT - // x = -0x1.a34p-1, exp2m1f16(x) = -0x1.bb4p-2 (RZ) - {0xba8dU, 0xb6edU, 0U, 1U, 1U}, -#endif - }}; -#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS - LLVM_LIBC_FUNCTION(float16, exp2m1f16, (float16 x)) { - using namespace math::expxf16_internal; - using FPBits = fputil::FPBits; - FPBits x_bits(x); - - uint16_t x_u = x_bits.uintval(); - uint16_t x_abs = x_u & 0x7fffU; - - // When |x| <= 2^(-3), or |x| >= 11, or x is NaN. - if (LIBC_UNLIKELY(x_abs <= 0x3000U || x_abs >= 0x4980U)) { - // exp2m1(NaN) = NaN - if (x_bits.is_nan()) { - if (x_bits.is_signaling_nan()) { - fputil::raise_except_if_required(FE_INVALID); - return FPBits::quiet_nan().get_val(); - } - - return x; - } - - // When x >= 16. - if (x_u >= 0x4c00 && x_bits.is_pos()) { - // exp2m1(+inf) = +inf - if (x_bits.is_inf()) - return FPBits::inf().get_val(); - - switch (fputil::quick_get_round()) { - case FE_TONEAREST: - case FE_UPWARD: - fputil::set_errno_if_required(ERANGE); - fputil::raise_except_if_required(FE_OVERFLOW | FE_INEXACT); - return FPBits::inf().get_val(); - default: - return FPBits::max_normal().get_val(); - } - } - - // When x < -11. - if (x_u > 0xc980U) { - // exp2m1(-inf) = -1 - if (x_bits.is_inf()) - return FPBits::one(Sign::NEG).get_val(); - - // When -12 < x < -11, round(2^x - 1, HP, RN) = -0x1.ffcp-1. - if (x_u < 0xca00U) - return fputil::round_result_slightly_down( - fputil::cast(-0x1.ffcp-1)); - - // When x <= -12, round(2^x - 1, HP, RN) = -1. - switch (fputil::quick_get_round()) { - case FE_TONEAREST: - case FE_DOWNWARD: - return FPBits::one(Sign::NEG).get_val(); - default: - return fputil::cast(-0x1.ffcp-1); - } - } - - // When |x| <= 2^(-3). - if (x_abs <= 0x3000U) { -#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS - if (auto r = EXP2M1F16_EXCEPTS_LO.lookup(x_u); - LIBC_UNLIKELY(r.has_value())) - return r.value(); -#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS - - float xf = x; - // Degree-5 minimax polynomial generated by Sollya with the following - // commands: - // > display = hexadecimal; - // > P = fpminimax((2^x - 1)/x, 4, [|SG...|], [-2^-3, 2^-3]); - // > x * P; - return fputil::cast( - xf * fputil::polyeval(xf, 0x1.62e43p-1f, 0x1.ebfbdep-3f, - 0x1.c6af88p-5f, 0x1.3b45d6p-7f, - 0x1.641e7cp-10f)); - } - } - -#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS - if (auto r = EXP2M1F16_EXCEPTS_HI.lookup(x_u); LIBC_UNLIKELY(r.has_value())) - return r.value(); -#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS - - // exp2(x) = exp2(hi + mid) * exp2(lo) - auto [exp2_hi_mid, exp2_lo] = exp2_range_reduction(x); - // exp2m1(x) = exp2(hi + mid) * exp2(lo) - 1 - return fputil::cast( - fputil::multiply_add(exp2_hi_mid, exp2_lo, -1.0f)); + return math::exp2m1f16(x); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/test/shared/CMakeLists.txt b/libc/test/shared/CMakeLists.txt index aede395350821..762b5b0417ef6 100644 --- a/libc/test/shared/CMakeLists.txt +++ b/libc/test/shared/CMakeLists.txt @@ -44,6 +44,7 @@ add_fp_unittest( libc.src.__support.math.exp2f libc.src.__support.math.exp2f16 libc.src.__support.math.exp2m1f + libc.src.__support.math.exp2m1f16 libc.src.__support.math.exp10 libc.src.__support.math.exp10f libc.src.__support.math.exp10f16 diff --git a/libc/test/shared/shared_math_test.cpp b/libc/test/shared/shared_math_test.cpp index a6825a10654c9..5b409781a5b07 100644 --- a/libc/test/shared/shared_math_test.cpp +++ b/libc/test/shared/shared_math_test.cpp @@ -29,6 +29,7 @@ TEST(LlvmLibcSharedMathTest, AllFloat16) { EXPECT_FP_EQ(0x1p+0f16, LIBC_NAMESPACE::shared::exp10f16(0.0f16)); EXPECT_FP_EQ(0x0p+0f16, LIBC_NAMESPACE::shared::exp10m1f16(0.0f16)); EXPECT_FP_EQ(0x1p+0f16, LIBC_NAMESPACE::shared::exp2f16(0.0f16)); + EXPECT_FP_EQ(0x0p+0f16, LIBC_NAMESPACE::shared::exp2m1f16(0.0f16)); EXPECT_FP_EQ(0x1p+0f16, LIBC_NAMESPACE::shared::expf16(0.0f16)); ASSERT_FP_EQ(float16(8 << 5), LIBC_NAMESPACE::shared::ldexpf16(8.0f16, 5)); diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index 5a1e0b53b021c..8d225d63cdf3e 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -2964,6 +2964,22 @@ libc_support_library( ], ) +libc_support_library( + name = "__support_math_exp2m1f16", + hdrs = ["src/__support/math/exp2m1f16.h"], + deps = [ + ":__support_fputil_except_value_utils", + ":__support_fputil_fma", + ":__support_fputil_multiply_add", + ":__support_fputil_nearest_integer", + ":__support_fputil_polyeval", + ":__support_fputil_rounding_mode", + ":__support_macros_optimization", + ":__support_math_common_constants", + ":__support_math_expxf16_utils", + ], +) + libc_support_library( name = "__support_math_exp10", hdrs = ["src/__support/math/exp10.h"], @@ -3762,7 +3778,7 @@ libc_math_function( libc_math_function( name = "exp2m1f16", additional_deps = [ - ":__support_math_expxf16_utils", + ":__support_math_exp2m1f16", ], ) From 98ca2e85450c5df63e5407c0579420518d808596 Mon Sep 17 00:00:00 2001 From: mitchell Date: Wed, 5 Nov 2025 20:51:22 +0800 Subject: [PATCH 11/39] [clang-tidy][NFC] Fix broken link in `bugprone-default-operator-new-on-overaligned-type` (#166546) --- clang-tools-extra/docs/clang-tidy/checks/cert/mem57-cpp.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang-tools-extra/docs/clang-tidy/checks/cert/mem57-cpp.rst b/clang-tools-extra/docs/clang-tidy/checks/cert/mem57-cpp.rst index cc0c729aaacdc..b359d85ad0cdc 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/cert/mem57-cpp.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/cert/mem57-cpp.rst @@ -4,7 +4,7 @@ cert-mem57-cpp ============== The `cert-mem57-cpp` is an aliaes, please see -`bugprone-default-operator-new-on-overaligned-type <../bugprone/default-operator-new-on-overaligned-type>`_ +`bugprone-default-operator-new-on-overaligned-type <../bugprone/default-operator-new-on-overaligned-type.html>`_ for more information. This check corresponds to the CERT C++ Coding Standard rule From a389472e0b6386a30fe2a078344b31f3be99bdca Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Wed, 5 Nov 2025 13:58:24 +0100 Subject: [PATCH 12/39] [Clang][NFC] Refactor SemaCXX/dllexport.cpp to use -verify= instead of macros (#165855) --- clang/test/SemaCXX/dllexport.cpp | 180 ++++++++++--------------------- 1 file changed, 55 insertions(+), 125 deletions(-) diff --git a/clang/test/SemaCXX/dllexport.cpp b/clang/test/SemaCXX/dllexport.cpp index f503e2fc311d1..169af5cacc6c7 100644 --- a/clang/test/SemaCXX/dllexport.cpp +++ b/clang/test/SemaCXX/dllexport.cpp @@ -1,13 +1,13 @@ -// RUN: %clang_cc1 -triple i686-win32 -fsyntax-only -fms-extensions -verify -std=c++11 -Wunsupported-dll-base-class-template -DMS %s -// RUN: %clang_cc1 -triple x86_64-win32 -fsyntax-only -fms-extensions -verify -std=c++1y -Wunsupported-dll-base-class-template -DMS %s -// RUN: %clang_cc1 -triple i686-mingw32 -fsyntax-only -fms-extensions -verify -std=c++1y -Wunsupported-dll-base-class-template -DGNU %s -// RUN: %clang_cc1 -triple x86_64-mingw32 -fsyntax-only -fms-extensions -verify -std=c++11 -Wunsupported-dll-base-class-template -DGNU %s -// RUN: %clang_cc1 -triple i686-pc-cygwin -fsyntax-only -fms-extensions -verify -std=c++1y -Wunsupported-dll-base-class-template -DGNU %s -// RUN: %clang_cc1 -triple x86_64-pc-cygwin -fsyntax-only -fms-extensions -verify -std=c++11 -Wunsupported-dll-base-class-template -DGNU %s -// RUN: %clang_cc1 -triple i686-windows-itanium -fsyntax-only -fms-extensions -verify -std=c++11 -Wunsupported-dll-base-class-template -DWI %s -// RUN: %clang_cc1 -triple x86_64-windows-itanium -fsyntax-only -fms-extensions -verify -std=c++1y -Wunsupported-dll-base-class-template -DWI %s -// RUN: %clang_cc1 -triple x86_64-scei-ps4 -fsyntax-only -fdeclspec -verify -std=c++11 -Wunsupported-dll-base-class-template -DPS %s -// RUN: %clang_cc1 -triple x86_64-sie-ps5 -fsyntax-only -fdeclspec -verify -std=c++1y -Wunsupported-dll-base-class-template -DPS %s +// RUN: %clang_cc1 -triple i686-win32 -fsyntax-only -fms-extensions -verify=expected,ms,non-gnu,ms-ps -std=c++11 -Wunsupported-dll-base-class-template %s +// RUN: %clang_cc1 -triple x86_64-win32 -fsyntax-only -fms-extensions -verify=expected,ms,non-gnu,ms-ps -std=c++1y -Wunsupported-dll-base-class-template %s +// RUN: %clang_cc1 -triple i686-mingw32 -fsyntax-only -fms-extensions -verify=expected,non-ms,gnu,win-gnu -std=c++1y -Wunsupported-dll-base-class-template %s +// RUN: %clang_cc1 -triple x86_64-mingw32 -fsyntax-only -fms-extensions -verify=expected,non-ms,gnu,win-gnu -std=c++11 -Wunsupported-dll-base-class-template %s +// RUN: %clang_cc1 -triple i686-pc-cygwin -fsyntax-only -fms-extensions -verify=expected,non-ms,gnu,win-gnu -std=c++1y -Wunsupported-dll-base-class-template %s +// RUN: %clang_cc1 -triple x86_64-pc-cygwin -fsyntax-only -fms-extensions -verify=expected,non-ms,gnu,win-gnu -std=c++11 -Wunsupported-dll-base-class-template %s +// RUN: %clang_cc1 -triple i686-windows-itanium -fsyntax-only -fms-extensions -verify=expected,non-ms,non-gnu,win-gnu -std=c++11 -Wunsupported-dll-base-class-template %s +// RUN: %clang_cc1 -triple x86_64-windows-itanium -fsyntax-only -fms-extensions -verify=expected,non-ms,non-gnu,win-gnu -std=c++1y -Wunsupported-dll-base-class-template %s +// RUN: %clang_cc1 -triple x86_64-scei-ps4 -fsyntax-only -fdeclspec -verify=expected,non-ms,non-gnu,ms-ps -std=c++11 -Wunsupported-dll-base-class-template %s +// RUN: %clang_cc1 -triple x86_64-sie-ps5 -fsyntax-only -fdeclspec -verify=expected,non-ms,non-gnu,ms-ps -std=c++1y -Wunsupported-dll-base-class-template %s // Helper structs to make templates more expressive. struct ImplicitInst_Exported {}; @@ -75,9 +75,7 @@ __declspec(dllexport) extern int GlobalRedecl4; // expected-warning{{redeclarati // External linkage is required. __declspec(dllexport) static int StaticGlobal; // expected-error{{'StaticGlobal' must have external linkage when declared 'dllexport'}} __declspec(dllexport) Internal InternalTypeGlobal; // expected-error{{'InternalTypeGlobal' must have external linkage when declared 'dllexport'}} -#ifndef MS -namespace { __declspec(dllexport) int InternalGlobal; } // expected-error{{'(anonymous namespace)::InternalGlobal' must have external linkage when declared 'dllexport'}} -#endif +namespace { __declspec(dllexport) int InternalGlobal; } // non-ms-error{{'(anonymous namespace)::InternalGlobal' must have external linkage when declared 'dllexport'}} namespace ns { __declspec(dllexport) int ExternalGlobal; } __declspec(dllexport) auto InternalAutoTypeGlobal = Internal(); // expected-error{{'InternalAutoTypeGlobal' must have external linkage when declared 'dllexport'}} @@ -132,9 +130,7 @@ template __declspec(dllexport) extern int VarTmplRedecl3; // expecte // External linkage is required. template __declspec(dllexport) static int StaticVarTmpl; // expected-error{{'StaticVarTmpl' must have external linkage when declared 'dllexport'}} template __declspec(dllexport) Internal InternalTypeVarTmpl; // expected-error{{'InternalTypeVarTmpl' must have external linkage when declared 'dllexport'}} -#ifndef MS -namespace { template __declspec(dllexport) int InternalVarTmpl; } // expected-error{{'(anonymous namespace)::InternalVarTmpl' must have external linkage when declared 'dllexport'}} -#endif +namespace { template __declspec(dllexport) int InternalVarTmpl; } // non-ms-error{{'(anonymous namespace)::InternalVarTmpl' must have external linkage when declared 'dllexport'}} namespace ns { template __declspec(dllexport) int ExternalVarTmpl = 1; } template __declspec(dllexport) auto InternalAutoTypeVarTmpl = Internal(); // expected-error{{'InternalAutoTypeVarTmpl' must have external linkage when declared 'dllexport'}} @@ -355,11 +351,8 @@ class __declspec(dllexport) ClassDecl; class __declspec(dllexport) ClassDef {}; -#if defined(MS) || defined (WI) || defined(PS) -// expected-warning@+3{{'dllexport' attribute ignored}} -#endif template struct PartiallySpecializedClassTemplate {}; -template struct __declspec(dllexport) PartiallySpecializedClassTemplate { void f() {} }; +template struct __declspec(dllexport) PartiallySpecializedClassTemplate { void f() {} }; // non-gnu-warning {{'dllexport' attribute ignored}} template struct ExpliciallySpecializedClassTemplate {}; template <> struct __declspec(dllexport) ExpliciallySpecializedClassTemplate { void f() {} }; @@ -373,16 +366,11 @@ ImplicitlyInstantiatedExportedTemplate implicitlyInstantiatedExp // Don't instantiate class members of templates with explicit instantiation declarations, even if they are exported. struct IncompleteType2; -#if defined(MS) || defined (WI) || defined(PS) -// expected-note@+2{{attribute is here}} -#endif -template struct __declspec(dllexport) ExportedTemplateWithExplicitInstantiationDecl { + +template struct __declspec(dllexport) ExportedTemplateWithExplicitInstantiationDecl { // non-gnu-note {{attribute is here}} int f() { return sizeof(T); } // no-error }; -#if defined(MS) || defined (WI) || defined(PS) -// expected-warning@+2{{explicit instantiation declaration should not be 'dllexport'}} -#endif -extern template struct ExportedTemplateWithExplicitInstantiationDecl; +extern template struct ExportedTemplateWithExplicitInstantiationDecl; // non-gnu-warning {{explicit instantiation declaration should not be 'dllexport'}} // Instantiate class members for explicitly instantiated exported templates. struct IncompleteType3; // expected-note{{forward declaration of 'IncompleteType3'}} @@ -392,16 +380,9 @@ template struct __declspec(dllexport) ExplicitlyInstantiatedExporte template struct ExplicitlyInstantiatedExportedTemplate; // expected-note{{in instantiation of member function 'ExplicitlyInstantiatedExportedTemplate::f' requested here}} // In MS mode, instantiate members of class templates that are base classes of exported classes. -#if defined(MS) || defined(PS) - // expected-note@+3{{forward declaration of 'IncompleteType4'}} - // expected-note@+3{{in instantiation of member function 'BaseClassTemplateOfExportedClass::f' requested here}} -#endif -struct IncompleteType4; -template struct BaseClassTemplateOfExportedClass { -#if defined(MS) || defined(PS) - // expected-error@+2{{invalid application of 'sizeof' to an incomplete type 'IncompleteType4'}} -#endif - int f() { return sizeof(T); }; +struct IncompleteType4; // ms-ps-note {{forward declaration of 'IncompleteType4'}} +template struct BaseClassTemplateOfExportedClass { // ms-ps-note {{in instantiation of member function 'BaseClassTemplateOfExportedClass::f' requested here}} + int f() { return sizeof(T); }; // ms-ps-error {{invalid application of 'sizeof' to an incomplete type 'IncompleteType4'}} }; struct __declspec(dllexport) ExportedBaseClass : public BaseClassTemplateOfExportedClass {}; @@ -414,17 +395,11 @@ struct __declspec(dllexport) ExportedBaseClass2 : public ExportedBaseClassTempla // Warn about explicit instantiation declarations of dllexport classes. template struct ExplicitInstantiationDeclTemplate {}; -#if defined(MS) || defined (WI) || defined(PS) -// expected-warning@+2{{explicit instantiation declaration should not be 'dllexport'}} expected-note@+2{{attribute is here}} -#endif -extern template struct __declspec(dllexport) ExplicitInstantiationDeclTemplate; +extern template struct __declspec(dllexport) ExplicitInstantiationDeclTemplate; // non-gnu-warning {{explicit instantiation declaration should not be 'dllexport'}} \ + non-gnu-note {{attribute is here}} -template struct __declspec(dllexport) ExplicitInstantiationDeclExportedTemplate {}; -#if defined(MS) || defined (WI) || defined(PS) -// expected-note@-2{{attribute is here}} -// expected-warning@+2{{explicit instantiation declaration should not be 'dllexport'}} -#endif -extern template struct ExplicitInstantiationDeclExportedTemplate; +template struct __declspec(dllexport) ExplicitInstantiationDeclExportedTemplate {}; // non-gnu-note {{attribute is here}} +extern template struct ExplicitInstantiationDeclExportedTemplate; // non-gnu-warning {{explicit instantiation declaration should not be 'dllexport'}} namespace { struct InternalLinkageType {}; } struct __declspec(dllexport) PR23308 { @@ -440,35 +415,23 @@ class __declspec(dllexport) ExportedClass {}; class __declspec(dllimport) ImportedClass {}; template class ClassTemplate {}; -#if not defined(MS) && not defined(PS) -// expected-error@+2{{'ExportedClassTemplate' must have external linkage when declared 'dllexport'}} -#endif -template class __declspec(dllexport) ExportedClassTemplate {}; +template class __declspec(dllexport) ExportedClassTemplate {}; // win-gnu-error {{'ExportedClassTemplate' must have external linkage when declared 'dllexport'}} template class __declspec(dllimport) ImportedClassTemplate {}; template struct ExplicitlySpecializedTemplate { void func() {} }; -#if defined(MS) || defined(PS) -// expected-note@+2{{class template 'ExplicitlySpecializedTemplate' was explicitly specialized here}} -#endif -template <> struct ExplicitlySpecializedTemplate { void func() {} }; +template <> struct ExplicitlySpecializedTemplate { void func() {} }; // ms-ps-note {{class template 'ExplicitlySpecializedTemplate' was explicitly specialized here}} template struct ExplicitlyExportSpecializedTemplate { void func() {} }; template <> struct __declspec(dllexport) ExplicitlyExportSpecializedTemplate { void func() {} }; template struct ExplicitlyImportSpecializedTemplate { void func() {} }; template <> struct __declspec(dllimport) ExplicitlyImportSpecializedTemplate { void func() {} }; template struct ExplicitlyInstantiatedTemplate { void func() {} }; -#if defined(MS) || defined(PS) -// expected-note@+2{{class template 'ExplicitlyInstantiatedTemplate' was instantiated here}} -#endif -template struct ExplicitlyInstantiatedTemplate; +template struct ExplicitlyInstantiatedTemplate; // ms-ps-note {{class template 'ExplicitlyInstantiatedTemplate' was instantiated here}} template struct ExplicitlyExportInstantiatedTemplate { void func() {} }; template struct __declspec(dllexport) ExplicitlyExportInstantiatedTemplate; template struct ExplicitlyExportDeclaredInstantiatedTemplate { void func() {} }; extern template struct ExplicitlyExportDeclaredInstantiatedTemplate; -#if not defined(MS) && not defined (WI) && not defined(PS) -// expected-warning@+2{{'dllexport' attribute ignored on explicit instantiation definition}} -#endif -template struct __declspec(dllexport) ExplicitlyExportDeclaredInstantiatedTemplate; +template struct __declspec(dllexport) ExplicitlyExportDeclaredInstantiatedTemplate; // gnu-warning {{'dllexport' attribute ignored on explicit instantiation definition}} template struct ExplicitlyImportInstantiatedTemplate { void func() {} }; template struct __declspec(dllimport) ExplicitlyImportInstantiatedTemplate; @@ -496,11 +459,8 @@ class __declspec(dllexport) DerivedFromTemplateB : public ClassTemplate {} // The second derived class doesn't change anything, the attribute that was propagated first wins. class __declspec(dllimport) DerivedFromTemplateB2 : public ClassTemplate {}; -#if defined(MS) || defined(PS) -// expected-warning@+3{{propagating dll attribute to explicitly specialized base class template without dll attribute is not supported}} -// expected-note@+2{{attribute is here}} -#endif -struct __declspec(dllexport) DerivedFromExplicitlySpecializedTemplate : public ExplicitlySpecializedTemplate {}; +struct __declspec(dllexport) DerivedFromExplicitlySpecializedTemplate : public ExplicitlySpecializedTemplate {}; // ms-ps-warning {{propagating dll attribute to explicitly specialized base class template without dll attribute is not supported}} \ + ms-ps-note {{attribute is here}} // Base class alredy specialized with export attribute. struct __declspec(dllexport) DerivedFromExplicitlyExportSpecializedTemplate : public ExplicitlyExportSpecializedTemplate {}; @@ -508,11 +468,8 @@ struct __declspec(dllexport) DerivedFromExplicitlyExportSpecializedTemplate : pu // Base class already specialized with import attribute. struct __declspec(dllexport) DerivedFromExplicitlyImportSpecializedTemplate : public ExplicitlyImportSpecializedTemplate {}; -#if defined(MS) || defined(PS) -// expected-warning@+3{{propagating dll attribute to already instantiated base class template without dll attribute is not supported}} -// expected-note@+2{{attribute is here}} -#endif -struct __declspec(dllexport) DerivedFromExplicitlyInstantiatedTemplate : public ExplicitlyInstantiatedTemplate {}; +struct __declspec(dllexport) DerivedFromExplicitlyInstantiatedTemplate : public ExplicitlyInstantiatedTemplate {}; // ms-ps-warning {{propagating dll attribute to already instantiated base class template without dll attribute is not supported}} \ + ms-ps-note {{attribute is here}} // Base class already instantiated with export attribute. struct __declspec(dllexport) DerivedFromExplicitlyExportInstantiatedTemplate : public ExplicitlyExportInstantiatedTemplate {}; @@ -528,10 +485,7 @@ void func() { // MSVC allows deriving from exported template classes in local contexts. class LocalDerivedFromExportedClass : public ExportedClass {}; class LocalDerivedFromExportedTemplate : public ExportedClassTemplate {}; -#if not defined(MS) && not defined (PS) - // expected-note@+2{{in instantiation of template class 'ExportedClassTemplate' requested here}} -#endif - class LocalCRTP : public ExportedClassTemplate {}; + class LocalCRTP : public ExportedClassTemplate {}; // win-gnu-note {{in instantiation of template class 'ExportedClassTemplate' requested here}} } //===----------------------------------------------------------------------===// @@ -778,46 +732,40 @@ __declspec(dllexport) void MemberRedecl::staticInlineDecl() {} // expect __declspec(dllexport) int MemberRedecl::StaticField = 1; // expected-error{{redeclaration of 'MemberRedecl::StaticField' cannot add 'dllexport' attribute}} __declspec(dllexport) const int MemberRedecl::StaticConstField = 1; // expected-error{{redeclaration of 'MemberRedecl::StaticConstField' cannot add 'dllexport' attribute}} -#ifdef MS -// expected-warning@+4{{attribute declaration must precede definition}} -#else -// expected-error@+2{{redeclaration of 'MemberRedecl::ConstexprField' cannot add 'dllexport' attribute}} -#endif -__declspec(dllexport) constexpr int MemberRedecl::ConstexprField; -#ifdef MS +__declspec(dllexport) constexpr int MemberRedecl::ConstexprField; // ms-warning {{attribute declaration must precede definition}} \ + non-ms-error {{redeclaration of 'MemberRedecl::ConstexprField' cannot add 'dllexport' attribute}} + struct __declspec(dllexport) ClassWithMultipleDefaultCtors { - ClassWithMultipleDefaultCtors(int = 40) {} // expected-error{{'__declspec(dllexport)' cannot be applied to more than one default constructor}} - ClassWithMultipleDefaultCtors(int = 30, ...) {} // expected-note{{declared here}} + ClassWithMultipleDefaultCtors(int = 40) {} // ms-error{{'__declspec(dllexport)' cannot be applied to more than one default constructor}} + ClassWithMultipleDefaultCtors(int = 30, ...) {} // ms-note{{declared here}} }; template struct ClassTemplateWithMultipleDefaultCtors { - __declspec(dllexport) ClassTemplateWithMultipleDefaultCtors(int = 40) {} // expected-error{{'__declspec(dllexport)' cannot be applied to more than one default constructor}} - __declspec(dllexport) ClassTemplateWithMultipleDefaultCtors(int = 30, ...) {} // expected-note{{declared here}} + __declspec(dllexport) ClassTemplateWithMultipleDefaultCtors(int = 40) {} // ms-error{{'__declspec(dllexport)' cannot be applied to more than one default constructor}} + __declspec(dllexport) ClassTemplateWithMultipleDefaultCtors(int = 30, ...) {} // ms-note{{declared here}} }; template struct HasDefaults { - HasDefaults(int x = sizeof(T)) {} // expected-error {{invalid application of 'sizeof'}} + HasDefaults(int x = sizeof(T)) {} // ms-error {{invalid application of 'sizeof'}} }; template struct __declspec(dllexport) HasDefaults; template struct -__declspec(dllexport) // expected-note {{in instantiation of default function argument expression for 'HasDefaults' required here}} -HasDefaults; // expected-note {{in instantiation of member function 'HasDefaults::HasDefaults' requested here}} +__declspec(dllexport) // ms-note {{in instantiation of default function argument expression for 'HasDefaults' required here}} +HasDefaults; // ms-note {{in instantiation of member function 'HasDefaults::HasDefaults' requested here}} template struct HasDefaults2 { - __declspec(dllexport) // expected-note {{in instantiation of default function argument expression for 'HasDefaults2' required here}} - HasDefaults2(int x = sizeof(T)) {} // expected-error {{invalid application of 'sizeof'}} + __declspec(dllexport) // ms-note {{in instantiation of default function argument expression for 'HasDefaults2' required here}} + HasDefaults2(int x = sizeof(T)) {} // ms-error {{invalid application of 'sizeof'}} }; -template struct HasDefaults2; // expected-note {{in instantiation of member function 'HasDefaults2::HasDefaults2' requested here}} +template struct HasDefaults2; // ms-note {{in instantiation of member function 'HasDefaults2::HasDefaults2' requested here}} -template struct __declspec(dllexport) HasDefaults3 { // expected-note{{in instantiation of default function argument expression for 'HasDefaults3' required here}} - HasDefaults3(int x = sizeof(T)) {} // expected-error {{invalid application of 'sizeof'}} +template struct __declspec(dllexport) HasDefaults3 { // ms-note{{in instantiation of default function argument expression for 'HasDefaults3' required here}} + HasDefaults3(int x = sizeof(T)) {} // ms-error {{invalid application of 'sizeof'}} }; template <> HasDefaults3::HasDefaults3(int) {}; -#endif - //===----------------------------------------------------------------------===// // Class member templates //===----------------------------------------------------------------------===// @@ -887,12 +835,8 @@ template __declspec(dllexport) void MemTmplRedecl::staticInli template __declspec(dllexport) int MemTmplRedecl::StaticField = 1; // expected-error{{redeclaration of 'MemTmplRedecl::StaticField' cannot add 'dllexport' attribute}} template __declspec(dllexport) const int MemTmplRedecl::StaticConstField = 1; // expected-error{{redeclaration of 'MemTmplRedecl::StaticConstField' cannot add 'dllexport' attribute}} -#ifdef MS -// expected-warning@+4{{attribute declaration must precede definition}} -#else -// expected-error@+2{{redeclaration of 'MemTmplRedecl::ConstexprField' cannot add 'dllexport' attribute}} -#endif -template __declspec(dllexport) constexpr int MemTmplRedecl::ConstexprField; +template __declspec(dllexport) constexpr int MemTmplRedecl::ConstexprField; // ms-warning {{attribute declaration must precede definition}} \ + non-ms-error {{redeclaration of 'MemTmplRedecl::ConstexprField' cannot add 'dllexport' attribute}} #endif // __has_feature(cxx_variable_templates) @@ -1097,20 +1041,13 @@ template __declspec(dllexport) void CTMR::staticInlineDecl template __declspec(dllexport) int CTMR::StaticField = 1; // expected-error{{redeclaration of 'CTMR::StaticField' cannot add 'dllexport' attribute}} template __declspec(dllexport) const int CTMR::StaticConstField = 1; // expected-error{{redeclaration of 'CTMR::StaticConstField' cannot add 'dllexport' attribute}} -#ifdef MS -// expected-warning@+4{{attribute declaration must precede definition}} -#else -// expected-error@+2{{redeclaration of 'CTMR::ConstexprField' cannot add 'dllexport' attribute}} -#endif -template __declspec(dllexport) constexpr int CTMR::ConstexprField; +template __declspec(dllexport) constexpr int CTMR::ConstexprField; // ms-warning {{attribute declaration must precede definition}} \ + non-ms-error {{redeclaration of 'CTMR::ConstexprField' cannot add 'dllexport' attribute}} // MSVC exports explicit specialization of exported class template member // function, and errors on such definitions. MinGW does not treat them as // dllexport. -#if !defined(GNU) -// expected-error@+2{{attribute 'dllexport' cannot be applied to a deleted function}} -#endif -template <> void ExportClassTmplMembers::normalDecl() = delete; +template <> void ExportClassTmplMembers::normalDecl() = delete; // non-gnu-error {{attribute 'dllexport' cannot be applied to a deleted function}} //===----------------------------------------------------------------------===// @@ -1183,12 +1120,8 @@ template template __declspec(dllexport) void CTMT #if __has_feature(cxx_variable_templates) template template __declspec(dllexport) int CTMTR::StaticField = 1; // expected-error{{redeclaration of 'CTMTR::StaticField' cannot add 'dllexport' attribute}} template template __declspec(dllexport) const int CTMTR::StaticConstField = 1; // expected-error{{redeclaration of 'CTMTR::StaticConstField' cannot add 'dllexport' attribute}} -#ifdef MS -// expected-warning@+4{{attribute declaration must precede definition}} -#else -// expected-error@+2{{redeclaration of 'CTMTR::ConstexprField' cannot add 'dllexport' attribute}} -#endif -template template __declspec(dllexport) constexpr int CTMTR::ConstexprField; +template template __declspec(dllexport) constexpr int CTMTR::ConstexprField; // ms-warning {{attribute declaration must precede definition}} \ + non-ms-error {{redeclaration of 'CTMTR::ConstexprField' cannot add 'dllexport' attribute}} #endif // __has_feature(cxx_variable_templates) // FIXME: Precedence rules seem to be different for classes. @@ -1197,7 +1130,4 @@ template template __declspec(dllexport) constexpr int CT // Lambdas //===----------------------------------------------------------------------===// // The MS ABI doesn't provide a stable mangling for lambdas, so they can't be imported or exported. -#if defined(MS) || defined (WI) || defined(PS) -// expected-error@+2{{lambda cannot be declared 'dllexport'}} -#endif -auto Lambda = []() __declspec(dllexport) -> bool { return true; }; +auto Lambda = []() __declspec(dllexport) -> bool { return true; }; // non-gnu-error {{lambda cannot be declared 'dllexport'}} From b675c0c498e45057537194c564ad3d3f18f229dd Mon Sep 17 00:00:00 2001 From: Mikhail Gudim Date: Wed, 5 Nov 2025 08:05:43 -0500 Subject: [PATCH 13/39] [RISCV] Add a test for multiple save locations of a callee-saved register (#164479) Technically, it is possible that the a callee-saved register is saved in different locations. CFIInstrInserter should handle this, but currently it does not. --- .../CodeGen/RISCV/cfi-multiple-locations.mir | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 llvm/test/CodeGen/RISCV/cfi-multiple-locations.mir diff --git a/llvm/test/CodeGen/RISCV/cfi-multiple-locations.mir b/llvm/test/CodeGen/RISCV/cfi-multiple-locations.mir new file mode 100644 index 0000000000000..7844589e3f93c --- /dev/null +++ b/llvm/test/CodeGen/RISCV/cfi-multiple-locations.mir @@ -0,0 +1,35 @@ +# RUN: llc %s -mtriple=riscv64 \ +# RUN: -run-pass=cfi-instr-inserter \ +# RUN: -riscv-enable-cfi-instr-inserter=true +# XFAIL: * + +# Technically, it is possible that a callee-saved register is saved in multiple different locations. +# CFIInstrInserter should handle this, but currently it does not. +--- +name: multiple_locations +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x9, $x2 + BEQ $x10, $x0, %bb.3 + PseudoBR %bb.2 + + bb.1: + liveins: $x10, $x9, $x2 + $x5 = COPY $x9 + CFI_INSTRUCTION register $x9, $x5 + $x9 = COPY $x5 + CFI_INSTRUCTION register $x9, $x9 + PseudoBR %bb.3 + + bb.2: + liveins: $x10, $x9, $x2 + SD $x9, $x2, 0 :: (store (s64)) + CFI_INSTRUCTION offset $x9, 0 + $x9 = LD $x2, 0 :: (load (s64)) + CFI_INSTRUCTION register $x9, $x9 + PseudoBR %bb.3 + + bb.3: + PseudoRET +... From 1de55c9693f3cff7b2a1d7b619d8f2936870a6b3 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Wed, 5 Nov 2025 13:06:24 +0000 Subject: [PATCH 14/39] [VPlan] Avoid sinking allocas in sinkScalarOperands (#166135) Use cannotHoistOrSinkRecipe to forbid sinking allocas. --- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 3 +-- .../LoopVectorize/tail-folding-alloca-in-loop.ll | 11 +++++++---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 2588c878d8472..9e65399e75dc7 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -167,8 +167,7 @@ static bool sinkScalarOperands(VPlan &Plan) { if (!isa(Candidate)) return; - if (Candidate->getParent() == SinkTo || Candidate->mayHaveSideEffects() || - Candidate->mayReadOrWriteMemory()) + if (Candidate->getParent() == SinkTo || cannotHoistOrSinkRecipe(*Candidate)) return; if (auto *RepR = dyn_cast(Candidate)) diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll index a852b731ea13b..9e523be618b44 100644 --- a/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll @@ -12,12 +12,15 @@ define i32 @test(ptr %vf1, i64 %n) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE6:.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i8> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE6]] ] ; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i8> [[VEC_IND]], splat (i8 -56) +; CHECK-NEXT: [[TMP18:%.*]] = alloca i8, i64 [[N]], align 16 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP18]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x ptr> [[BROADCAST_SPLATINSERT]], <4 x ptr> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 ; CHECK-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[VF1]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = alloca i8, i64 [[N]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x ptr> [[BROADCAST_SPLAT]], i32 0 ; CHECK-NEXT: store ptr [[TMP4]], ptr [[TMP3]], align 8 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] ; CHECK: [[PRED_STORE_CONTINUE]]: @@ -26,7 +29,7 @@ define i32 @test(ptr %vf1, i64 %n) { ; CHECK: [[PRED_STORE_IF1]]: ; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[VF1]], i64 [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = alloca i8, i64 [[N]], align 16 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x ptr> [[BROADCAST_SPLAT]], i32 0 ; CHECK-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]] ; CHECK: [[PRED_STORE_CONTINUE2]]: @@ -35,7 +38,7 @@ define i32 @test(ptr %vf1, i64 %n) { ; CHECK: [[PRED_STORE_IF3]]: ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds ptr, ptr [[VF1]], i64 [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = alloca i8, i64 [[N]], align 16 +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x ptr> [[BROADCAST_SPLAT]], i32 0 ; CHECK-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE4]] ; CHECK: [[PRED_STORE_CONTINUE4]]: @@ -44,7 +47,7 @@ define i32 @test(ptr %vf1, i64 %n) { ; CHECK: [[PRED_STORE_IF5]]: ; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 3 ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds ptr, ptr [[VF1]], i64 [[TMP14]] -; CHECK-NEXT: [[TMP16:%.*]] = alloca i8, i64 [[N]], align 16 +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x ptr> [[BROADCAST_SPLAT]], i32 0 ; CHECK-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 8 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE6]] ; CHECK: [[PRED_STORE_CONTINUE6]]: From 305cf623d7ab6fc6340930c1c21fe9f7d39d0e8f Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Wed, 5 Nov 2025 14:17:05 +0100 Subject: [PATCH 15/39] [Polly] Check for ISL errors after schedule optimization (#166551) When ISL encounters an internal error, it sets the error flag, but it is not isl_error_quota that was already checked. Check for general errors and abort the schedule optimization if that happens, instead of continuing on the good path. The error occured when compiling llvm-test-suite's MultiSource/Applications/JM/lencod/leaky_bucket.c with Polly enabled. Not adding a test case because it depends on ISL internals. We do not want to a test case to depend on which version of ISL is used. --- polly/lib/Transform/ScheduleOptimizer.cpp | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/polly/lib/Transform/ScheduleOptimizer.cpp b/polly/lib/Transform/ScheduleOptimizer.cpp index 0888ebd7a9362..cb08397c201f2 100644 --- a/polly/lib/Transform/ScheduleOptimizer.cpp +++ b/polly/lib/Transform/ScheduleOptimizer.cpp @@ -927,9 +927,24 @@ static void runIslScheduleOptimizer( walkScheduleTreeForStatistics(Schedule, 2); } + // Check for why any computation could have failed if (MaxOpGuard.hasQuotaExceeded()) { POLLY_DEBUG(dbgs() << "Schedule optimizer calculation exceeds ISL quota\n"); return; + } else if (isl_ctx_last_error(Ctx) != isl_error_none) { + const char *File = isl_ctx_last_error_file(Ctx); + int Line = isl_ctx_last_error_line(Ctx); + const char *Msg = isl_ctx_last_error_msg(Ctx); + POLLY_DEBUG( + dbgs() + << "ISL reported an error during the computation of a new schedule at " + << File << ":" << Line << ": " << Msg); + isl_ctx_reset_error(Ctx); + return; + } else if (Schedule.is_null()) { + POLLY_DEBUG(dbgs() << "Schedule optimizer did not compute a new schedule " + "for unknown reasons\n"); + return; } // Skip profitability check if user transformation(s) have been applied. From ba1dbdd44a1db9a5a0912830afc05d7b18cbf666 Mon Sep 17 00:00:00 2001 From: Valery Pykhtin Date: Wed, 5 Nov 2025 14:30:27 +0100 Subject: [PATCH 16/39] Revert "[utils][UpdateLLCTestChecks] Add MIR support to update_llc_test_checks.py." (#166549) Reverts llvm/llvm-project#164965 --- .../Inputs/x86_asm_mir_mixed.ll | 17 ---- .../Inputs/x86_asm_mir_mixed.ll.expected | 45 --------- .../Inputs/x86_asm_mir_same_prefix.ll | 13 --- .../x86_asm_mir_same_prefix.ll.expected | 16 ---- .../x86-asm-mir-mixed.test | 9 -- .../x86-asm-mir-same-prefix.test | 7 -- llvm/utils/UpdateTestChecks/common.py | 1 - llvm/utils/UpdateTestChecks/mir.py | 11 +-- llvm/utils/update_llc_test_checks.py | 94 ++++--------------- 9 files changed, 19 insertions(+), 194 deletions(-) delete mode 100644 llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_mixed.ll delete mode 100644 llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_mixed.ll.expected delete mode 100644 llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_same_prefix.ll delete mode 100644 llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_same_prefix.ll.expected delete mode 100644 llvm/test/tools/UpdateTestChecks/update_llc_test_checks/x86-asm-mir-mixed.test delete mode 100644 llvm/test/tools/UpdateTestChecks/update_llc_test_checks/x86-asm-mir-same-prefix.test diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_mixed.ll b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_mixed.ll deleted file mode 100644 index 292637177591f..0000000000000 --- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_mixed.ll +++ /dev/null @@ -1,17 +0,0 @@ -; RUN: llc -mtriple=x86_64 < %s | FileCheck %s --check-prefix=ASM -; RUN: llc -mtriple=x86_64 -stop-after=finalize-isel < %s | FileCheck %s --check-prefix=MIR - -define i64 @test1(i64 %i) nounwind readnone { - %loc = alloca i64 - %j = load i64, ptr %loc - %r = add i64 %i, %j - ret i64 %r -} - -define i64 @test2(i32 %i) nounwind readnone { - %loc = alloca i32 - %j = load i32, ptr %loc - %r = add i32 %i, %j - %ext = zext i32 %r to i64 - ret i64 %ext -} diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_mixed.ll.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_mixed.ll.expected deleted file mode 100644 index 88cb03e85204a..0000000000000 --- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_mixed.ll.expected +++ /dev/null @@ -1,45 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=x86_64 < %s | FileCheck %s --check-prefix=ASM -; RUN: llc -mtriple=x86_64 -stop-after=finalize-isel < %s | FileCheck %s --check-prefix=MIR - -define i64 @test1(i64 %i) nounwind readnone { -; ASM-LABEL: test1: -; ASM: # %bb.0: -; ASM-NEXT: movq %rdi, %rax -; ASM-NEXT: addq -{{[0-9]+}}(%rsp), %rax -; ASM-NEXT: retq -; MIR-LABEL: name: test1 -; MIR: bb.0 (%ir-block.0): -; MIR-NEXT: liveins: $rdi -; MIR-NEXT: {{ $}} -; MIR-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY $rdi -; MIR-NEXT: [[ADD64rm:%[0-9]+]]:gr64 = ADD64rm [[COPY]], %stack.0.loc, 1, $noreg, 0, $noreg, implicit-def dead $eflags :: (dereferenceable load (s64) from %ir.loc) -; MIR-NEXT: $rax = COPY [[ADD64rm]] -; MIR-NEXT: RET 0, $rax - %loc = alloca i64 - %j = load i64, ptr %loc - %r = add i64 %i, %j - ret i64 %r -} - -define i64 @test2(i32 %i) nounwind readnone { -; ASM-LABEL: test2: -; ASM: # %bb.0: -; ASM-NEXT: movl %edi, %eax -; ASM-NEXT: addl -{{[0-9]+}}(%rsp), %eax -; ASM-NEXT: retq -; MIR-LABEL: name: test2 -; MIR: bb.0 (%ir-block.0): -; MIR-NEXT: liveins: $edi -; MIR-NEXT: {{ $}} -; MIR-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $edi -; MIR-NEXT: [[ADD32rm:%[0-9]+]]:gr32 = ADD32rm [[COPY]], %stack.0.loc, 1, $noreg, 0, $noreg, implicit-def dead $eflags :: (dereferenceable load (s32) from %ir.loc) -; MIR-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gr64 = SUBREG_TO_REG 0, killed [[ADD32rm]], %subreg.sub_32bit -; MIR-NEXT: $rax = COPY [[SUBREG_TO_REG]] -; MIR-NEXT: RET 0, $rax - %loc = alloca i32 - %j = load i32, ptr %loc - %r = add i32 %i, %j - %ext = zext i32 %r to i64 - ret i64 %ext -} diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_same_prefix.ll b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_same_prefix.ll deleted file mode 100644 index 7167bcf258e68..0000000000000 --- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_same_prefix.ll +++ /dev/null @@ -1,13 +0,0 @@ -; RUN: llc -mtriple=x86_64 < %s | FileCheck %s --check-prefix=CHECK -; RUN: llc -mtriple=x86_64 -stop-after=finalize-isel < %s | FileCheck %s --check-prefix=CHECK - -define i32 @add(i32 %a, i32 %b) { - %sum = add i32 %a, %b - ret i32 %sum -} - -define i32 @sub(i32 %a, i32 %b) { - %diff = sub i32 %a, %b - ret i32 %diff -} - diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_same_prefix.ll.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_same_prefix.ll.expected deleted file mode 100644 index 1ba920d1de8b0..0000000000000 --- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_same_prefix.ll.expected +++ /dev/null @@ -1,16 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=x86_64 < %s | FileCheck %s --check-prefix=CHECK -; RUN: llc -mtriple=x86_64 -stop-after=finalize-isel < %s | FileCheck %s --check-prefix=CHECK - -define i32 @add(i32 %a, i32 %b) { - %sum = add i32 %a, %b - ret i32 %sum -} - -define i32 @sub(i32 %a, i32 %b) { - %diff = sub i32 %a, %b - ret i32 %diff -} - -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; CHECK: {{.*}} diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/x86-asm-mir-mixed.test b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/x86-asm-mir-mixed.test deleted file mode 100644 index 6fc57b583b37d..0000000000000 --- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/x86-asm-mir-mixed.test +++ /dev/null @@ -1,9 +0,0 @@ -# REQUIRES: x86-registered-target -## Test checking that update_llc_test_checks.py can generate both ASM and MIR checks in the same file - -# RUN: cp -f %S/Inputs/x86_asm_mir_mixed.ll %t.ll && %update_llc_test_checks %t.ll -# RUN: diff -u %S/Inputs/x86_asm_mir_mixed.ll.expected %t.ll - -## Verify that running the script again on an already updated file doesn't add duplicate checks -# RUN: %update_llc_test_checks %t.ll -# RUN: diff -u %S/Inputs/x86_asm_mir_mixed.ll.expected %t.ll diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/x86-asm-mir-same-prefix.test b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/x86-asm-mir-same-prefix.test deleted file mode 100644 index bb91a44678f1a..0000000000000 --- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/x86-asm-mir-same-prefix.test +++ /dev/null @@ -1,7 +0,0 @@ -## Test that using the same prefix for both ASM and MIR outputs generates a warning -## and doesn't produce any checks. - -# RUN: cp -f %S/Inputs/x86_asm_mir_same_prefix.ll %t.ll && %update_llc_test_checks %t.ll 2>&1 | FileCheck %s --check-prefix=WARNING -# RUN: diff -u %S/Inputs/x86_asm_mir_same_prefix.ll.expected %t.ll - -# WARNING: WARNING: The following prefixes are used for both ASM and MIR output, which will cause FileCheck failures: CHECK diff --git a/llvm/utils/UpdateTestChecks/common.py b/llvm/utils/UpdateTestChecks/common.py index baa0377cd8b81..2dad16a8eebb7 100644 --- a/llvm/utils/UpdateTestChecks/common.py +++ b/llvm/utils/UpdateTestChecks/common.py @@ -605,7 +605,6 @@ def invoke_tool(exe, cmd_args, ir, preprocess_cmd=None, verbose=False): TRIPLE_ARG_RE = re.compile(r"-m?triple[= ]([^ ]+)") MARCH_ARG_RE = re.compile(r"-march[= ]([^ ]+)") DEBUG_ONLY_ARG_RE = re.compile(r"-debug-only[= ]([^ ]+)") -STOP_PASS_RE = re.compile(r"-stop-(before|after)=(\w+)") IS_DEBUG_RECORD_RE = re.compile(r"^(\s+)#dbg_") IS_SWITCH_CASE_RE = re.compile(r"^\s+i\d+ \d+, label %\S+") diff --git a/llvm/utils/UpdateTestChecks/mir.py b/llvm/utils/UpdateTestChecks/mir.py index 01ee0e19f7cb9..24bb8b341d335 100644 --- a/llvm/utils/UpdateTestChecks/mir.py +++ b/llvm/utils/UpdateTestChecks/mir.py @@ -163,15 +163,13 @@ def add_mir_checks_for_function( print_fixed_stack, first_check_is_next, at_the_function_name, - check_indent=None, ): printed_prefixes = set() for run in run_list: for prefix in run[0]: if prefix in printed_prefixes: break - # func_info can be empty if there was a prefix conflict. - if not func_dict[prefix].get(func_name): + if not func_dict[prefix][func_name]: continue if printed_prefixes: # Add some space between different check prefixes. @@ -187,7 +185,6 @@ def add_mir_checks_for_function( func_dict[prefix][func_name], print_fixed_stack, first_check_is_next, - check_indent, ) break else: @@ -207,7 +204,6 @@ def add_mir_check_lines( func_info, print_fixed_stack, first_check_is_next, - check_indent=None, ): func_body = str(func_info).splitlines() if single_bb: @@ -224,10 +220,7 @@ def add_mir_check_lines( first_line = func_body[0] indent = len(first_line) - len(first_line.lstrip(" ")) # A check comment, indented the appropriate amount - if check_indent is not None: - check = "{}; {}".format(check_indent, prefix) - else: - check = "{:>{}}; {}".format("", indent, prefix) + check = "{:>{}}; {}".format("", indent, prefix) output_lines.append("{}-LABEL: name: {}".format(check, func_name)) diff --git a/llvm/utils/update_llc_test_checks.py b/llvm/utils/update_llc_test_checks.py index 98864be62875b..8c57e75f34f75 100755 --- a/llvm/utils/update_llc_test_checks.py +++ b/llvm/utils/update_llc_test_checks.py @@ -15,7 +15,7 @@ import os # Used to advertise this file's name ("autogenerated_note"). import sys -from UpdateTestChecks import common, mir +from UpdateTestChecks import common # llc is the only llc-like in the LLVM tree but downstream forks can add # additional ones here if they have them. @@ -33,7 +33,6 @@ def update_test(ti: common.TestInfo): break run_list = [] - mir_run_list = [] for l in ti.run_lines: if "|" not in l: common.warn("Skipping unparsable RUN line: " + l) @@ -58,14 +57,9 @@ def update_test(ti: common.TestInfo): if m: march_in_cmd = m.groups()[0] - target_list = run_list m = common.DEBUG_ONLY_ARG_RE.search(llc_cmd) if m and m.groups()[0] == "isel": from UpdateTestChecks import isel as output_type - elif not m and common.STOP_PASS_RE.search(llc_cmd): - # MIR output mode. If -debug-only is present assume - # the debug output is the main point of interest. - target_list = mir_run_list else: from UpdateTestChecks import asm as output_type @@ -90,7 +84,7 @@ def update_test(ti: common.TestInfo): # FIXME: We should use multiple check prefixes to common check lines. For # now, we just ignore all but the last. - target_list.append( + run_list.append( ( check_prefixes, llc_tool, @@ -125,20 +119,14 @@ def update_test(ti: common.TestInfo): ginfo=ginfo, ) - # Dictionary to store MIR function bodies separately - mir_func_dict = {} - for run_tuple, is_mir in [(run, False) for run in run_list] + [ - (run, True) for run in mir_run_list - ]: - ( - prefixes, - llc_tool, - llc_args, - preprocess_cmd, - triple_in_cmd, - march_in_cmd, - ) = run_tuple - + for ( + prefixes, + llc_tool, + llc_args, + preprocess_cmd, + triple_in_cmd, + march_in_cmd, + ) in run_list: common.debug("Extracted LLC cmd:", llc_tool, llc_args) common.debug("Extracted FileCheck prefixes:", str(prefixes)) @@ -153,54 +141,22 @@ def update_test(ti: common.TestInfo): if not triple: triple = common.get_triple_from_march(march_in_cmd) - if is_mir: - # MIR output mode - common.debug("Detected MIR output mode for prefixes:", str(prefixes)) - for prefix in prefixes: - if prefix not in mir_func_dict: - mir_func_dict[prefix] = {} - - mir.build_function_info_dictionary( - ti.path, - raw_tool_output, - triple, - prefixes, - mir_func_dict, - ti.args.verbose, + scrubber, function_re = output_type.get_run_handler(triple) + if 0 == builder.process_run_line( + function_re, scrubber, raw_tool_output, prefixes + ): + common.warn( + "Couldn't match any function. Possibly the wrong target triple has been provided" ) - else: - # ASM output mode - scrubber, function_re = output_type.get_run_handler(triple) - if 0 == builder.process_run_line( - function_re, scrubber, raw_tool_output, prefixes - ): - common.warn( - "Couldn't match any function. Possibly the wrong target triple has been provided" - ) - builder.processed_prefixes(prefixes) + builder.processed_prefixes(prefixes) func_dict = builder.finish_and_get_func_dict() - - # Check for conflicts: same prefix used for both ASM and MIR - conflicting_prefixes = set(func_dict.keys()) & set(mir_func_dict.keys()) - if conflicting_prefixes: - common.warn( - "The following prefixes are used for both ASM and MIR output, which will cause FileCheck failures: {}".format( - ", ".join(sorted(conflicting_prefixes)) - ), - test_file=ti.path, - ) - for prefix in conflicting_prefixes: - mir_func_dict[prefix] = {} - func_dict[prefix] = {} - global_vars_seen_dict = {} is_in_function = False is_in_function_start = False func_name = None prefix_set = set([prefix for p in run_list for prefix in p[0]]) - prefix_set.update([prefix for p in mir_run_list for prefix in p[0]]) common.debug("Rewriting FileCheck prefixes:", str(prefix_set)) output_lines = [] @@ -265,22 +221,6 @@ def update_test(ti: common.TestInfo): is_filtered=builder.is_filtered(), ) ) - - # Also add MIR checks if we have them for this function - if mir_run_list and func_name: - mir.add_mir_checks_for_function( - ti.path, - output_lines, - mir_run_list, - mir_func_dict, - func_name, - single_bb=False, # Don't skip basic block labels. - print_fixed_stack=False, # Don't print fixed stack (ASM tests don't need it). - first_check_is_next=False, # First check is LABEL, not NEXT. - at_the_function_name=False, # Use "name:" not "@name". - check_indent="", # No indentation for IR files (not MIR files). - ) - is_in_function_start = False if is_in_function: From 438a18c1e105ca04e624239644195e48b28b5099 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 5 Nov 2025 13:50:32 +0000 Subject: [PATCH 17/39] [X86] Add test coverage for #166534 (#166552) --- llvm/test/CodeGen/X86/pr166534.ll | 124 ++++++++++++++++++++++++++++++ 1 file changed, 124 insertions(+) create mode 100644 llvm/test/CodeGen/X86/pr166534.ll diff --git a/llvm/test/CodeGen/X86/pr166534.ll b/llvm/test/CodeGen/X86/pr166534.ll new file mode 100644 index 0000000000000..aef44cc3e40d0 --- /dev/null +++ b/llvm/test/CodeGen/X86/pr166534.ll @@ -0,0 +1,124 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s --check-prefixes=SSE2 +; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=SSE4 +; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=AVX2 +; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=AVX512 + +define void @pr166534(ptr %pa, ptr %pb, ptr %pc, ptr %pd) { +; SSE2-LABEL: pr166534: +; SSE2: # %bb.0: # %entry +; SSE2-NEXT: movq (%rdi), %rax +; SSE2-NEXT: movq 8(%rdi), %r8 +; SSE2-NEXT: movdqu (%rdi), %xmm0 +; SSE2-NEXT: movq (%rsi), %r9 +; SSE2-NEXT: movq 8(%rsi), %rdi +; SSE2-NEXT: movdqu (%rsi), %xmm1 +; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 +; SSE2-NEXT: pmovmskb %xmm1, %esi +; SSE2-NEXT: xorl %r10d, %r10d +; SSE2-NEXT: cmpl $65535, %esi # imm = 0xFFFF +; SSE2-NEXT: sete %r10b +; SSE2-NEXT: orq %r10, (%rdx) +; SSE2-NEXT: cmpl $65535, %esi # imm = 0xFFFF +; SSE2-NEXT: jne .LBB0_2 +; SSE2-NEXT: # %bb.1: # %if.then +; SSE2-NEXT: xorq %r9, %rax +; SSE2-NEXT: xorq %rdi, %r8 +; SSE2-NEXT: xorl %edx, %edx +; SSE2-NEXT: orq %rax, %r8 +; SSE2-NEXT: sete %dl +; SSE2-NEXT: orq %rdx, (%rcx) +; SSE2-NEXT: .LBB0_2: # %if.end +; SSE2-NEXT: retq +; +; SSE4-LABEL: pr166534: +; SSE4: # %bb.0: # %entry +; SSE4-NEXT: movq (%rdi), %rax +; SSE4-NEXT: movq 8(%rdi), %r8 +; SSE4-NEXT: movdqu (%rdi), %xmm0 +; SSE4-NEXT: movq (%rsi), %r9 +; SSE4-NEXT: movq 8(%rsi), %rdi +; SSE4-NEXT: movdqu (%rsi), %xmm1 +; SSE4-NEXT: pxor %xmm0, %xmm1 +; SSE4-NEXT: xorl %esi, %esi +; SSE4-NEXT: ptest %xmm1, %xmm1 +; SSE4-NEXT: sete %sil +; SSE4-NEXT: orq %rsi, (%rdx) +; SSE4-NEXT: ptest %xmm1, %xmm1 +; SSE4-NEXT: jne .LBB0_2 +; SSE4-NEXT: # %bb.1: # %if.then +; SSE4-NEXT: xorq %r9, %rax +; SSE4-NEXT: xorq %rdi, %r8 +; SSE4-NEXT: xorl %edx, %edx +; SSE4-NEXT: orq %rax, %r8 +; SSE4-NEXT: sete %dl +; SSE4-NEXT: orq %rdx, (%rcx) +; SSE4-NEXT: .LBB0_2: # %if.end +; SSE4-NEXT: retq +; +; AVX2-LABEL: pr166534: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: movq (%rdi), %rax +; AVX2-NEXT: movq 8(%rdi), %r8 +; AVX2-NEXT: vmovdqu (%rdi), %xmm0 +; AVX2-NEXT: movq (%rsi), %rdi +; AVX2-NEXT: vpxor (%rsi), %xmm0, %xmm0 +; AVX2-NEXT: movq 8(%rsi), %rsi +; AVX2-NEXT: xorl %r9d, %r9d +; AVX2-NEXT: vptest %xmm0, %xmm0 +; AVX2-NEXT: sete %r9b +; AVX2-NEXT: orq %r9, (%rdx) +; AVX2-NEXT: vptest %xmm0, %xmm0 +; AVX2-NEXT: jne .LBB0_2 +; AVX2-NEXT: # %bb.1: # %if.then +; AVX2-NEXT: xorq %rdi, %rax +; AVX2-NEXT: xorq %rsi, %r8 +; AVX2-NEXT: xorl %edx, %edx +; AVX2-NEXT: orq %rax, %r8 +; AVX2-NEXT: sete %dl +; AVX2-NEXT: orq %rdx, (%rcx) +; AVX2-NEXT: .LBB0_2: # %if.end +; AVX2-NEXT: retq +; +; AVX512-LABEL: pr166534: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: movq (%rdi), %rax +; AVX512-NEXT: movq 8(%rdi), %r8 +; AVX512-NEXT: vmovdqu (%rdi), %xmm0 +; AVX512-NEXT: movq (%rsi), %r9 +; AVX512-NEXT: movq 8(%rsi), %rdi +; AVX512-NEXT: vpxor (%rsi), %xmm0, %xmm0 +; AVX512-NEXT: xorl %esi, %esi +; AVX512-NEXT: vptest %xmm0, %xmm0 +; AVX512-NEXT: sete %sil +; AVX512-NEXT: orq %rsi, (%rdx) +; AVX512-NEXT: vptest %xmm0, %xmm0 +; AVX512-NEXT: jne .LBB0_2 +; AVX512-NEXT: # %bb.1: # %if.then +; AVX512-NEXT: xorq %r9, %rax +; AVX512-NEXT: xorq %rdi, %r8 +; AVX512-NEXT: xorl %edx, %edx +; AVX512-NEXT: orq %rax, %r8 +; AVX512-NEXT: sete %dl +; AVX512-NEXT: orq %rdx, (%rcx) +; AVX512-NEXT: .LBB0_2: # %if.end +; AVX512-NEXT: retq +entry: + %a = load i128, ptr %pa, align 8 + %b = load i128, ptr %pb, align 8 + %cmp = icmp eq i128 %a, %b + %conv1 = zext i1 %cmp to i128 + %c = load i128, ptr %pc, align 8 + %or = or i128 %c, %conv1 + store i128 %or, ptr %pc, align 8 + br i1 %cmp, label %if.then, label %if.end + +if.then: + %d = load i128, ptr %pd, align 8 + %or7 = or i128 %d, %conv1 + store i128 %or7, ptr %pd, align 8 + br label %if.end + +if.end: + ret void +} From 5fedb7c58881d83708d9669630497dbbfbdeea54 Mon Sep 17 00:00:00 2001 From: Tomer Shafir Date: Wed, 5 Nov 2025 16:29:05 +0200 Subject: [PATCH 18/39] [Clang][ARM] Fix tests using thumb instead arm arch on cc1 (#166416) Currently, the ARM backend incorrectly parses every `arm` prefixed arch to be non-thumb, but `armv6m` is THUMB and doesnt have ARM ops causing the test to fail when compiling to assembly and not LLVM IR: `error: Function 'foo' uses ARM instructions, but the target does not support ARM mode execution.` This only happens when invoking cc1 directly and not the Clang driver. As a quick triage, this patch changes the tests to use `thumb`. Uncovered by https://github.com/llvm/llvm-project/pull/151404 --- clang/test/AST/ast-dump-arm-attr.c | 6 +++--- clang/test/CodeGen/arm-acle-coproc.c | 4 ++-- clang/test/CodeGen/pr45476.cpp | 2 +- clang/test/Sema/builtins-arm-exclusive-124.c | 4 ++-- clang/test/Sema/builtins-arm-exclusive-none.c | 2 +- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/clang/test/AST/ast-dump-arm-attr.c b/clang/test/AST/ast-dump-arm-attr.c index 78f557d4eb0b1..d26a77d067e97 100644 --- a/clang/test/AST/ast-dump-arm-attr.c +++ b/clang/test/AST/ast-dump-arm-attr.c @@ -2,7 +2,7 @@ // RUN: %clang_cc1 -triple arm-apple-darwin -ast-dump -ast-dump-filter Test %s \ // RUN: | FileCheck --strict-whitespace %s // -// RUN: %clang_cc1 -triple armv8m.base-none-eabi -mcmse -ast-dump -ast-dump-filter Test %s \ +// RUN: %clang_cc1 -triple thumbv8m.base-none-eabi -mcmse -ast-dump -ast-dump-filter Test %s \ // RUN: | FileCheck --strict-whitespace %s --check-prefix=CHECK-CMSE // // Tests with serialization: @@ -11,8 +11,8 @@ // RUN: | sed -e "s/ //" -e "s/ imported//" \ // RUN: | FileCheck --strict-whitespace %s // -// RUN: %clang_cc1 -triple armv8m.base-none-eabi -mcmse -emit-pch -o %t %s -// RUN: %clang_cc1 -x c -triple armv8m.base-none-eabi -mcmse -include-pch %t -ast-dump-all -ast-dump-filter Test /dev/null \ +// RUN: %clang_cc1 -triple thumbv8m.base-none-eabi -mcmse -emit-pch -o %t %s +// RUN: %clang_cc1 -x c -triple thumbv8m.base-none-eabi -mcmse -include-pch %t -ast-dump-all -ast-dump-filter Test /dev/null \ // RUN: | sed -e "s/ //" -e "s/ imported//" \ // RUN: | FileCheck --strict-whitespace %s diff --git a/clang/test/CodeGen/arm-acle-coproc.c b/clang/test/CodeGen/arm-acle-coproc.c index 5acb9f65413a0..000fff632f0b7 100644 --- a/clang/test/CodeGen/arm-acle-coproc.c +++ b/clang/test/CodeGen/arm-acle-coproc.c @@ -4,10 +4,10 @@ // RUN: %clang_cc1 -triple armv5te %s -E -dD -o - | FileCheck --check-prefix=CHECK-V5-TE %s // RUN: %clang_cc1 -triple armv5tej %s -E -dD -o - | FileCheck --check-prefix=CHECK-V5-TE %s // RUN: %clang_cc1 -triple armv6 %s -E -dD -o - | FileCheck --check-prefix=CHECK-V6 %s -// RUN: %clang_cc1 -triple armv6m %s -E -dD -o - | FileCheck --check-prefix=CHECK-V6M %s +// RUN: %clang_cc1 -triple thumbv6m %s -E -dD -o - | FileCheck --check-prefix=CHECK-V6M %s // RUN: %clang_cc1 -triple armv7a %s -E -dD -o - | FileCheck --check-prefix=CHECK-V7 %s // RUN: %clang_cc1 -triple armv7r %s -E -dD -o - | FileCheck --check-prefix=CHECK-V7 %s -// RUN: %clang_cc1 -triple armv7m %s -E -dD -o - | FileCheck --check-prefix=CHECK-V7 %s +// RUN: %clang_cc1 -triple thumbv7m %s -E -dD -o - | FileCheck --check-prefix=CHECK-V7 %s // RUN: %clang_cc1 -triple armv8a %s -E -dD -o - | FileCheck --check-prefix=CHECK-V8 %s // RUN: %clang_cc1 -triple armv8r %s -E -dD -o - | FileCheck --check-prefix=CHECK-V8 %s // RUN: %clang_cc1 -triple armv8.1a %s -E -dD -o - | FileCheck --check-prefix=CHECK-V8 %s diff --git a/clang/test/CodeGen/pr45476.cpp b/clang/test/CodeGen/pr45476.cpp index c95f7fb8cd9c3..3a67904a8e568 100644 --- a/clang/test/CodeGen/pr45476.cpp +++ b/clang/test/CodeGen/pr45476.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -triple armv6m-eabi -emit-llvm %s -o - | FileCheck -check-prefix=LIBCALL %s +// RUN: %clang_cc1 -triple thumbv6m-eabi -emit-llvm %s -o - | FileCheck -check-prefix=LIBCALL %s // RUN: %clang_cc1 -triple armv8-eabi -emit-llvm %s -o - | FileCheck -check-prefix=NATIVE %s // PR45476 diff --git a/clang/test/Sema/builtins-arm-exclusive-124.c b/clang/test/Sema/builtins-arm-exclusive-124.c index b35ac181f0887..93540879a01ba 100644 --- a/clang/test/Sema/builtins-arm-exclusive-124.c +++ b/clang/test/Sema/builtins-arm-exclusive-124.c @@ -1,5 +1,5 @@ -// RUN: %clang_cc1 -triple armv7m -fsyntax-only -verify %s -// RUN: %clang_cc1 -triple armv8m.main -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple thumbv7m -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple thumbv8m.main -fsyntax-only -verify %s // RUN: %clang_cc1 -triple armv8.1m.main -fsyntax-only -verify %s // All these architecture versions provide 1-, 2- or 4-byte exclusive accesses, diff --git a/clang/test/Sema/builtins-arm-exclusive-none.c b/clang/test/Sema/builtins-arm-exclusive-none.c index 2ef910dd99aaf..25a71e18935a6 100644 --- a/clang/test/Sema/builtins-arm-exclusive-none.c +++ b/clang/test/Sema/builtins-arm-exclusive-none.c @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -triple armv6m -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple thumbv6m -fsyntax-only -verify %s // Armv6-M does not support exclusive loads/stores at all, so all uses of // __builtin_arm_ldrex[d] and __builtin_arm_strex[d] is forbidden. From dd14eb8242d756f7ac03d003effab289d8950adf Mon Sep 17 00:00:00 2001 From: Alex Bradbury Date: Wed, 5 Nov 2025 14:31:17 +0000 Subject: [PATCH 19/39] [RISCV] Introduce pass to promote double constants to a global array (#160536) As discussed in #153402, we have inefficiences in handling constant pool access that are difficult to address. Using an IR pass to promote double constants to a global allows a higher degree of control of code generation for these accesses, resulting in improved performance on benchmarks that might otherwise have high register pressure due to accessing constant pool values separately rather than via a common base. Directly promoting double constants to separate global values and relying on the global merger to do a sensible thing would be one potential avenue to explore, but it is _not_ done in this version of the patch because: * The global merger pass needs fixes. For instance it claims to be a function pass, yet all of the work is done in initialisation. This means that attempts by backends to schedule it after a given module pass don't actually work as expected. * The heuristics used can impact codegen unexpectedly, so I worry that tweaking it to get the behaviour desired for promoted constants may lead to other issues. This may be completely tractable though. Now that #159352 has landed, the impact on terms if dynamically executed instructions is slightly smaller (as we are starting from a better baseline), but still worthwhile in lbm and nab from SPEC. Results below are for rva22u64: ``` Benchmark Baseline This PR Diff (%) ============================================================ ============================================================ 500.perlbench_r 180668945687 180666122417 -0.00% 502.gcc_r 221274522161 221277565086 0.00% 505.mcf_r 134656204033 134656204066 0.00% 508.namd_r 217646645332 216699783858 -0.44% 510.parest_r 291731988950 291916190776 0.06% 511.povray_r 30983594866 31107718817 0.40% 519.lbm_r 91217999812 87405361395 -4.18% 520.omnetpp_r 137699867177 137674535853 -0.02% 523.xalancbmk_r 284730719514 284734023366 0.00% 525.x264_r 379107521547 379100250568 -0.00% 526.blender_r 659391437610 659447919505 0.01% 531.deepsjeng_r 350038121654 350038121656 0.00% 538.imagick_r 238568674979 238560772162 -0.00% 541.leela_r 405660852855 405654701346 -0.00% 544.nab_r 398215801848 391352111262 -1.72% 557.xz_r 129832192047 129832192055 0.00% ``` --- Notes for reviewers: * As discussed at the sync-up meeting, the suggestion is to try to land an incremental improvement to the status quo even if there is more work to be done around the general issue of constant pool handling. We can discuss here if that is actually the best next step or not, but I just wanted to clarify that's why this is being posted with a somewhat narrow scope. * I've disabled transformations both for RV32 and on systems without D as both cases saw some regressions. --- llvm/lib/Target/RISCV/CMakeLists.txt | 1 + llvm/lib/Target/RISCV/RISCV.h | 4 + .../lib/Target/RISCV/RISCVPromoteConstant.cpp | 213 ++++++++++++++++++ llvm/lib/Target/RISCV/RISCVTargetMachine.cpp | 3 + llvm/test/CodeGen/RISCV/O3-pipeline.ll | 1 + .../CodeGen/RISCV/riscv-promote-constant.ll | 148 ++++++++++++ 6 files changed, 370 insertions(+) create mode 100644 llvm/lib/Target/RISCV/RISCVPromoteConstant.cpp create mode 100644 llvm/test/CodeGen/RISCV/riscv-promote-constant.ll diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt index 0ff178e1f1959..e9088a4d9275c 100644 --- a/llvm/lib/Target/RISCV/CMakeLists.txt +++ b/llvm/lib/Target/RISCV/CMakeLists.txt @@ -58,6 +58,7 @@ add_llvm_target(RISCVCodeGen RISCVMoveMerger.cpp RISCVOptWInstrs.cpp RISCVPostRAExpandPseudoInsts.cpp + RISCVPromoteConstant.cpp RISCVPushPopOptimizer.cpp RISCVRedundantCopyElimination.cpp RISCVRegisterInfo.cpp diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h index ae9410193efe1..51e8e8574ed15 100644 --- a/llvm/lib/Target/RISCV/RISCV.h +++ b/llvm/lib/Target/RISCV/RISCV.h @@ -20,6 +20,7 @@ namespace llvm { class FunctionPass; class InstructionSelector; +class ModulePass; class PassRegistry; class RISCVRegisterBankInfo; class RISCVSubtarget; @@ -111,6 +112,9 @@ void initializeRISCVO0PreLegalizerCombinerPass(PassRegistry &); FunctionPass *createRISCVPreLegalizerCombiner(); void initializeRISCVPreLegalizerCombinerPass(PassRegistry &); +ModulePass *createRISCVPromoteConstantPass(); +void initializeRISCVPromoteConstantPass(PassRegistry &); + FunctionPass *createRISCVVLOptimizerPass(); void initializeRISCVVLOptimizerPass(PassRegistry &); diff --git a/llvm/lib/Target/RISCV/RISCVPromoteConstant.cpp b/llvm/lib/Target/RISCV/RISCVPromoteConstant.cpp new file mode 100644 index 0000000000000..bf1f69f8e8d93 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVPromoteConstant.cpp @@ -0,0 +1,213 @@ +//==- RISCVPromoteConstant.cpp - Promote constant fp to global for RISC-V --==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "RISCV.h" +#include "RISCVSubtarget.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define DEBUG_TYPE "riscv-promote-const" +#define RISCV_PROMOTE_CONSTANT_NAME "RISC-V Promote Constants" + +STATISTIC(NumPromoted, "Number of constant literals promoted to globals"); +STATISTIC(NumPromotedUses, "Number of uses of promoted literal constants"); + +namespace { + +class RISCVPromoteConstant : public ModulePass { +public: + static char ID; + RISCVPromoteConstant() : ModulePass(ID) {} + + StringRef getPassName() const override { return RISCV_PROMOTE_CONSTANT_NAME; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.setPreservesCFG(); + } + + /// Iterate over the functions and promote the double fp constants that + /// would otherwise go into the constant pool to a constant array. + bool runOnModule(Module &M) override { + if (skipModule(M)) + return false; + // TargetMachine and Subtarget are needed to query isFPImmlegal. + const TargetPassConfig &TPC = getAnalysis(); + const TargetMachine &TM = TPC.getTM(); + bool Changed = false; + for (Function &F : M) { + const RISCVSubtarget &ST = TM.getSubtarget(F); + const RISCVTargetLowering *TLI = ST.getTargetLowering(); + Changed |= runOnFunction(F, TLI); + } + return Changed; + } + +private: + bool runOnFunction(Function &F, const RISCVTargetLowering *TLI); +}; +} // end anonymous namespace + +char RISCVPromoteConstant::ID = 0; + +INITIALIZE_PASS(RISCVPromoteConstant, DEBUG_TYPE, RISCV_PROMOTE_CONSTANT_NAME, + false, false) + +ModulePass *llvm::createRISCVPromoteConstantPass() { + return new RISCVPromoteConstant(); +} + +bool RISCVPromoteConstant::runOnFunction(Function &F, + const RISCVTargetLowering *TLI) { + if (F.hasOptNone() || F.hasOptSize()) + return false; + + // Bail out and make no transformation if the target doesn't support + // doubles, or if we're not targeting RV64 as we currently see some + // regressions for those targets. + if (!TLI->isTypeLegal(MVT::f64) || !TLI->isTypeLegal(MVT::i64)) + return false; + + // Collect all unique double constants and their uses in the function. Use + // MapVector to preserve insertion order. + MapVector> ConstUsesMap; + + for (Instruction &I : instructions(F)) { + for (Use &U : I.operands()) { + auto *C = dyn_cast(U.get()); + if (!C || !C->getType()->isDoubleTy()) + continue; + // Do not promote if it wouldn't be loaded from the constant pool. + if (TLI->isFPImmLegal(C->getValueAPF(), MVT::f64, + /*ForCodeSize=*/false)) + continue; + // Do not promote a constant if it is used as an immediate argument + // for an intrinsic. + if (auto *II = dyn_cast(U.getUser())) { + Function *IntrinsicFunc = II->getFunction(); + unsigned OperandIdx = U.getOperandNo(); + if (IntrinsicFunc && IntrinsicFunc->getAttributes().hasParamAttr( + OperandIdx, Attribute::ImmArg)) { + LLVM_DEBUG(dbgs() << "Skipping promotion of constant in: " << *II + << " because operand " << OperandIdx + << " must be an immediate.\n"); + continue; + } + } + // Note: FP args to inline asm would be problematic if we had a + // constraint that required an immediate floating point operand. At the + // time of writing LLVM doesn't recognise such a constraint. + ConstUsesMap[C].push_back(&U); + } + } + + int PromotableConstants = ConstUsesMap.size(); + LLVM_DEBUG(dbgs() << "Found " << PromotableConstants + << " promotable constants in " << F.getName() << "\n"); + // Bail out if no promotable constants found, or if only one is found. + if (PromotableConstants < 2) { + LLVM_DEBUG(dbgs() << "Performing no promotions as insufficient promotable " + "constants found\n"); + return false; + } + + NumPromoted += PromotableConstants; + + // Create a global array containing the promoted constants. + Module *M = F.getParent(); + Type *DoubleTy = Type::getDoubleTy(M->getContext()); + + SmallVector ConstantVector; + for (auto const &Pair : ConstUsesMap) + ConstantVector.push_back(Pair.first); + + ArrayType *ArrayTy = ArrayType::get(DoubleTy, ConstantVector.size()); + Constant *GlobalArrayInitializer = + ConstantArray::get(ArrayTy, ConstantVector); + + auto *GlobalArray = new GlobalVariable( + *M, ArrayTy, + /*isConstant=*/true, GlobalValue::InternalLinkage, GlobalArrayInitializer, + ".promoted_doubles." + F.getName()); + + // A cache to hold the loaded value for a given constant within a basic block. + DenseMap, Value *> LocalLoads; + + // Replace all uses with the loaded value. + unsigned Idx = 0; + for (auto const &Pair : ConstUsesMap) { + ConstantFP *Const = Pair.first; + const SmallVector &Uses = Pair.second; + + for (Use *U : Uses) { + Instruction *UserInst = cast(U->getUser()); + BasicBlock *InsertionBB; + + // If the user is a PHI node, we must insert the load in the + // corresponding predecessor basic block. Otherwise, it's inserted into + // the same block as the use. + if (auto *PN = dyn_cast(UserInst)) + InsertionBB = PN->getIncomingBlock(*U); + else + InsertionBB = UserInst->getParent(); + + if (isa(InsertionBB->getTerminator())) { + LLVM_DEBUG(dbgs() << "Bailing out: catchswitch means thre is no valid " + "insertion point.\n"); + return false; + } + + auto CacheKey = std::make_pair(Const, InsertionBB); + Value *LoadedVal = nullptr; + + // Re-use a load if it exists in the insertion block. + if (LocalLoads.count(CacheKey)) { + LoadedVal = LocalLoads.at(CacheKey); + } else { + // Otherwise, create a new GEP and Load at the correct insertion point. + // It is always safe to insert in the first insertion point in the BB, + // so do that and let other passes reorder. + IRBuilder<> Builder(InsertionBB, InsertionBB->getFirstInsertionPt()); + Value *ElementPtr = Builder.CreateConstInBoundsGEP2_64( + GlobalArray->getValueType(), GlobalArray, 0, Idx, "double.addr"); + LoadedVal = Builder.CreateLoad(DoubleTy, ElementPtr, "double.val"); + + // Cache the newly created load for this block. + LocalLoads[CacheKey] = LoadedVal; + } + + U->set(LoadedVal); + ++NumPromotedUses; + } + ++Idx; + } + + return true; +} diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index ae54ff1515121..16ef67da83128 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -139,6 +139,7 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() { initializeRISCVExpandAtomicPseudoPass(*PR); initializeRISCVRedundantCopyEliminationPass(*PR); initializeRISCVAsmPrinterPass(*PR); + initializeRISCVPromoteConstantPass(*PR); } static Reloc::Model getEffectiveRelocModel(std::optional RM) { @@ -462,6 +463,8 @@ void RISCVPassConfig::addIRPasses() { } bool RISCVPassConfig::addPreISel() { + if (TM->getOptLevel() != CodeGenOptLevel::None) + addPass(createRISCVPromoteConstantPass()); if (TM->getOptLevel() != CodeGenOptLevel::None) { // Add a barrier before instruction selection so that we will not get // deleted block address after enabling default outlining. See D99707 for diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll index ea08061221fd4..769823d1c4216 100644 --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -75,6 +75,7 @@ ; CHECK-NEXT: CodeGen Prepare ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Exception handling preparation +; CHECK-NEXT: RISC-V Promote Constants ; CHECK-NEXT: A No-Op Barrier Pass ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Merge internal globals diff --git a/llvm/test/CodeGen/RISCV/riscv-promote-constant.ll b/llvm/test/CodeGen/RISCV/riscv-promote-constant.ll new file mode 100644 index 0000000000000..2bde6013b3640 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/riscv-promote-constant.ll @@ -0,0 +1,148 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt %s -S -riscv-promote-const -mtriple=riscv64 -mattr=+d | FileCheck %s + +; No promotion should take place, as the pass skips floats. +define float @multiple_floats(float %a, float %b) { +; CHECK-LABEL: define float @multiple_floats( +; CHECK-SAME: float [[A:%.*]], float [[B:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[ADD1:%.*]] = fadd float [[A]], 1.000000e+00 +; CHECK-NEXT: [[ADD2:%.*]] = fadd float [[B]], 2.000000e+00 +; CHECK-NEXT: [[SUM_F:%.*]] = fadd float [[ADD1]], [[ADD2]] +; CHECK-NEXT: ret float [[SUM_F]] +; +entry: + %add1 = fadd float %a, 1.0 + %add2 = fadd float %b, 2.0 + %sum_f = fadd float %add1, %add2 + ret float %sum_f +} + +; No promotion should take place as cases with a single constant are skipped. +define double @single_double(double %a) { +; CHECK-LABEL: define double @single_double( +; CHECK-SAME: double [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[ADD:%.*]] = fadd double [[A]], 4.210000e+01 +; CHECK-NEXT: ret double [[ADD]] +; +entry: + %add = fadd double %a, 42.1 + ret double %add +} + +; Promotion should happen as we have at least two unique constants that would +; otherwise go in the constant pool. +define double @multiple_doubles(double %a, double %b) { +; CHECK-LABEL: define double @multiple_doubles( +; CHECK-SAME: double [[A:%.*]], double [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[DOUBLE_VAL1:%.*]] = load double, ptr getelementptr inbounds ([2 x double], ptr @.promoted_doubles.multiple_doubles, i64 0, i64 1), align 8 +; CHECK-NEXT: [[ADD3:%.*]] = load double, ptr @.promoted_doubles.multiple_doubles, align 8 +; CHECK-NEXT: [[ADD2:%.*]] = fadd double [[A]], [[ADD3]] +; CHECK-NEXT: [[ADD4:%.*]] = fadd double [[B]], [[DOUBLE_VAL1]] +; CHECK-NEXT: [[SUM:%.*]] = fadd double [[ADD2]], [[ADD3]] +; CHECK-NEXT: [[SUM1:%.*]] = fadd double [[ADD4]], [[SUM]] +; CHECK-NEXT: ret double [[SUM1]] +; +entry: + %add1 = fadd double %a, 2.718 + %add2 = fadd double %b, 42.1 + %add3 = fadd double %add1, 2.718 + %sum = fadd double %add2, %add3 + ret double %sum +} + +; Promotion should not happen as the constants will be materialised rather +; than using the constant pool. +define double @multiple_doubles_no_promote(double %a, double %b) { +; CHECK-LABEL: define double @multiple_doubles_no_promote( +; CHECK-SAME: double [[A:%.*]], double [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[ADD1:%.*]] = fadd double [[A]], 1.000000e+00 +; CHECK-NEXT: [[ADD2:%.*]] = fadd double [[B]], 2.000000e+00 +; CHECK-NEXT: [[ADD3:%.*]] = fadd double [[ADD1]], 1.000000e+00 +; CHECK-NEXT: [[SUM:%.*]] = fadd double [[ADD2]], [[ADD3]] +; CHECK-NEXT: ret double [[SUM]] +; +entry: + %add1 = fadd double %a, 1.0 + %add2 = fadd double %b, 2.0 + %add3 = fadd double %add1, 1.0 + %sum = fadd double %add2, %add3 + ret double %sum +} + +; The same constant shouldn't be loaded more than once per BB. +define double @multiple_doubles_multi_bb(double %a, i1 %cond) { +; CHECK-LABEL: define double @multiple_doubles_multi_bb( +; CHECK-SAME: double [[A:%.*]], i1 [[COND:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br i1 [[COND]], label %[[IF_TRUE:.*]], label %[[IF_FALSE:.*]] +; CHECK: [[IF_TRUE]]: +; CHECK-NEXT: [[DOUBLE_VAL2:%.*]] = load double, ptr getelementptr inbounds ([2 x double], ptr @.promoted_doubles.multiple_doubles_multi_bb, i64 0, i64 1), align 8 +; CHECK-NEXT: [[DOUBLE_VAL:%.*]] = load double, ptr @.promoted_doubles.multiple_doubles_multi_bb, align 8 +; CHECK-NEXT: [[ADD_T:%.*]] = fadd double [[A]], [[DOUBLE_VAL]] +; CHECK-NEXT: [[MUL_T:%.*]] = fmul double [[ADD_T]], [[DOUBLE_VAL2]] +; CHECK-NEXT: [[SUB_T:%.*]] = fsub double [[MUL_T]], [[DOUBLE_VAL]] +; CHECK-NEXT: br label %[[IF_END:.*]] +; CHECK: [[IF_FALSE]]: +; CHECK-NEXT: [[DOUBLE_VAL3:%.*]] = load double, ptr getelementptr inbounds ([2 x double], ptr @.promoted_doubles.multiple_doubles_multi_bb, i64 0, i64 1), align 8 +; CHECK-NEXT: [[DOUBLE_VAL1:%.*]] = load double, ptr @.promoted_doubles.multiple_doubles_multi_bb, align 8 +; CHECK-NEXT: [[ADD_F:%.*]] = fadd double [[A]], [[DOUBLE_VAL1]] +; CHECK-NEXT: [[MUL_F:%.*]] = fmul double [[ADD_F]], [[DOUBLE_VAL3]] +; CHECK-NEXT: [[SUB_F:%.*]] = fsub double [[MUL_F]], [[DOUBLE_VAL1]] +; CHECK-NEXT: br label %[[IF_END]] +; CHECK: [[IF_END]]: +; CHECK-NEXT: [[PHI_RES:%.*]] = phi double [ [[SUB_T]], %[[IF_TRUE]] ], [ [[SUB_F]], %[[IF_FALSE]] ] +; CHECK-NEXT: ret double [[PHI_RES]] +; +entry: + br i1 %cond, label %if.true, label %if.false + +if.true: + %add.t = fadd double %a, 1.23 + %mul.t = fmul double %add.t, 4.56 + %sub.t = fsub double %mul.t, 1.23 + br label %if.end + +if.false: + %add.f = fadd double %a, 1.23 + %mul.f = fmul double %add.f, 4.56 + %sub.f = fsub double %mul.f, 1.23 + br label %if.end + +if.end: + %phi.res = phi double [ %sub.t, %if.true ], [ %sub.f, %if.false ] + ret double %phi.res +} + +; Check the insertion point in the case we have a phi taking a constant C and +; the source block also uses that same constant. +define double @multiple_doubles_phi(double %a, i1 %cond) { +; CHECK-LABEL: define double @multiple_doubles_phi( +; CHECK-SAME: double [[A:%.*]], i1 [[COND:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br i1 [[COND]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +; CHECK: [[IF_THEN]]: +; CHECK-NEXT: [[DOUBLE_VAL:%.*]] = load double, ptr @.promoted_doubles.multiple_doubles_phi, align 8 +; CHECK-NEXT: [[MUL:%.*]] = fmul double [[A]], [[DOUBLE_VAL]] +; CHECK-NEXT: br label %[[IF_END]] +; CHECK: [[IF_END]]: +; CHECK-NEXT: [[PHI_VAL:%.*]] = phi double [ [[DOUBLE_VAL]], %[[IF_THEN]] ], [ [[A]], %[[ENTRY]] ] +; CHECK-NEXT: [[DOUBLE_VAL1:%.*]] = load double, ptr getelementptr inbounds ([2 x double], ptr @.promoted_doubles.multiple_doubles_phi, i64 0, i64 1), align 8 +; CHECK-NEXT: [[RES:%.*]] = fadd double [[PHI_VAL]], [[DOUBLE_VAL1]] +; CHECK-NEXT: ret double [[RES]] +; +entry: + br i1 %cond, label %if.then, label %if.end + +if.then: + %mul = fmul double %a, 1.23 + br label %if.end + +if.end: + %phi.val = phi double [ 1.23, %if.then ], [ %a, %entry ] + %res = fadd double %phi.val, 4.56 + ret double %res +} From 3426f9c4811d7b710336bafe24e1930ac8f5d123 Mon Sep 17 00:00:00 2001 From: Chanho Lee Date: Wed, 5 Nov 2025 23:45:11 +0900 Subject: [PATCH 20/39] [clang][NFC] Rename stale TypeSourceInfo DI variables (#166082) Fixes #165346 This patch renames stale variable names where `TypeSourceInfo` objects were still using the old `DI` (`DeclaratorInfo`) naming convention. Specifically, variables of type `TypeSourceInfo` have been updated from `DI` to `TSI` to improve code clarity and maintain consistency with the current naming. --- clang/include/clang/Sema/Sema.h | 2 +- clang/lib/AST/ASTContext.cpp | 12 +- clang/lib/Sema/SemaDeclObjC.cpp | 10 +- clang/lib/Sema/SemaTemplate.cpp | 18 +- clang/lib/Sema/SemaTemplateDeductionGuide.cpp | 36 ++-- clang/lib/Sema/SemaTemplateInstantiate.cpp | 40 ++-- .../lib/Sema/SemaTemplateInstantiateDecl.cpp | 186 +++++++++--------- clang/lib/Sema/SemaType.cpp | 7 +- clang/lib/Sema/TreeTransform.h | 94 +++++---- clang/lib/Sema/TypeLocBuilder.h | 6 +- 10 files changed, 200 insertions(+), 211 deletions(-) diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index c67ed99b1f49e..eb8d7d1112016 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -11668,7 +11668,7 @@ class Sema final : public SemaBase { ASTTemplateArgsPtr TemplateArgsIn, SourceLocation RAngleLoc); DeclResult ActOnVarTemplateSpecialization( - Scope *S, Declarator &D, TypeSourceInfo *DI, LookupResult &Previous, + Scope *S, Declarator &D, TypeSourceInfo *TSI, LookupResult &Previous, SourceLocation TemplateKWLoc, TemplateParameterList *TemplateParams, StorageClass SC, bool IsPartialSpecialization); diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 2669f62456711..fab907b9c1a40 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -3108,9 +3108,9 @@ TypeSourceInfo *ASTContext::CreateTypeSourceInfo(QualType T, TypeSourceInfo *ASTContext::getTrivialTypeSourceInfo(QualType T, SourceLocation L) const { - TypeSourceInfo *DI = CreateTypeSourceInfo(T); - DI->getTypeLoc().initialize(const_cast(*this), L); - return DI; + TypeSourceInfo *TSI = CreateTypeSourceInfo(T); + TSI->getTypeLoc().initialize(const_cast(*this), L); + return TSI; } const ASTRecordLayout & @@ -5891,11 +5891,11 @@ TypeSourceInfo *ASTContext::getTemplateSpecializationTypeInfo( QualType TST = getTemplateSpecializationType( Keyword, Name, SpecifiedArgs.arguments(), CanonicalArgs, Underlying); - TypeSourceInfo *DI = CreateTypeSourceInfo(TST); - DI->getTypeLoc().castAs().set( + TypeSourceInfo *TSI = CreateTypeSourceInfo(TST); + TSI->getTypeLoc().castAs().set( ElaboratedKeywordLoc, QualifierLoc, TemplateKeywordLoc, NameLoc, SpecifiedArgs); - return DI; + return TSI; } QualType ASTContext::getTemplateSpecializationType( diff --git a/clang/lib/Sema/SemaDeclObjC.cpp b/clang/lib/Sema/SemaDeclObjC.cpp index 3df9f9c1d68c7..53ff818a2af53 100644 --- a/clang/lib/Sema/SemaDeclObjC.cpp +++ b/clang/lib/Sema/SemaDeclObjC.cpp @@ -4730,13 +4730,13 @@ ParmVarDecl *SemaObjC::ActOnMethodParmDeclaration(Scope *S, bool MethodDefinition) { ASTContext &Context = getASTContext(); QualType ArgType; - TypeSourceInfo *DI; + TypeSourceInfo *TSI; if (!ArgInfo.Type) { ArgType = Context.getObjCIdType(); - DI = nullptr; + TSI = nullptr; } else { - ArgType = SemaRef.GetTypeFromParser(ArgInfo.Type, &DI); + ArgType = SemaRef.GetTypeFromParser(ArgInfo.Type, &TSI); } LookupResult R(SemaRef, ArgInfo.Name, ArgInfo.NameLoc, Sema::LookupOrdinaryName, @@ -4753,14 +4753,14 @@ ParmVarDecl *SemaObjC::ActOnMethodParmDeclaration(Scope *S, } } SourceLocation StartLoc = - DI ? DI->getTypeLoc().getBeginLoc() : ArgInfo.NameLoc; + TSI ? TSI->getTypeLoc().getBeginLoc() : ArgInfo.NameLoc; // Temporarily put parameter variables in the translation unit. This is what // ActOnParamDeclarator does in the case of C arguments to the Objective-C // method too. ParmVarDecl *Param = SemaRef.CheckParameter( Context.getTranslationUnitDecl(), StartLoc, ArgInfo.NameLoc, ArgInfo.Name, - ArgType, DI, SC_None); + ArgType, TSI, SC_None); Param->setObjCMethodScopeInfo(ParamIndex); Param->setObjCDeclQualifier( CvtQTToAstBitMask(ArgInfo.DeclSpec.getObjCDeclQualifier())); diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp index 2cc65935def53..983a7842ef450 100644 --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -949,11 +949,11 @@ static TemplateArgumentLoc translateTemplateArgument(Sema &SemaRef, switch (Arg.getKind()) { case ParsedTemplateArgument::Type: { - TypeSourceInfo *DI; - QualType T = SemaRef.GetTypeFromParser(Arg.getAsType(), &DI); - if (!DI) - DI = SemaRef.Context.getTrivialTypeSourceInfo(T, Arg.getNameLoc()); - return TemplateArgumentLoc(TemplateArgument(T), DI); + TypeSourceInfo *TSI; + QualType T = SemaRef.GetTypeFromParser(Arg.getAsType(), &TSI); + if (!TSI) + TSI = SemaRef.Context.getTrivialTypeSourceInfo(T, Arg.getNameLoc()); + return TemplateArgumentLoc(TemplateArgument(T), TSI); } case ParsedTemplateArgument::NonType: { @@ -4329,7 +4329,7 @@ void Sema::CheckDeductionGuideTemplate(FunctionTemplateDecl *TD) { } DeclResult Sema::ActOnVarTemplateSpecialization( - Scope *S, Declarator &D, TypeSourceInfo *DI, LookupResult &Previous, + Scope *S, Declarator &D, TypeSourceInfo *TSI, LookupResult &Previous, SourceLocation TemplateKWLoc, TemplateParameterList *TemplateParams, StorageClass SC, bool IsPartialSpecialization) { // D must be variable template id. @@ -4455,8 +4455,8 @@ DeclResult Sema::ActOnVarTemplateSpecialization( VarTemplatePartialSpecializationDecl *Partial = VarTemplatePartialSpecializationDecl::Create( Context, VarTemplate->getDeclContext(), TemplateKWLoc, - TemplateNameLoc, TemplateParams, VarTemplate, DI->getType(), DI, SC, - CTAI.CanonicalConverted); + TemplateNameLoc, TemplateParams, VarTemplate, TSI->getType(), TSI, + SC, CTAI.CanonicalConverted); Partial->setTemplateArgsAsWritten(TemplateArgs); if (!PrevPartial) @@ -4474,7 +4474,7 @@ DeclResult Sema::ActOnVarTemplateSpecialization( // this explicit specialization or friend declaration. Specialization = VarTemplateSpecializationDecl::Create( Context, VarTemplate->getDeclContext(), TemplateKWLoc, TemplateNameLoc, - VarTemplate, DI->getType(), DI, SC, CTAI.CanonicalConverted); + VarTemplate, TSI->getType(), TSI, SC, CTAI.CanonicalConverted); Specialization->setTemplateArgsAsWritten(TemplateArgs); if (!PrevDecl) diff --git a/clang/lib/Sema/SemaTemplateDeductionGuide.cpp b/clang/lib/Sema/SemaTemplateDeductionGuide.cpp index bfcd3978817ca..40811d4c42e2a 100644 --- a/clang/lib/Sema/SemaTemplateDeductionGuide.cpp +++ b/clang/lib/Sema/SemaTemplateDeductionGuide.cpp @@ -632,34 +632,34 @@ struct ConvertConstructorToDeductionGuideTransform { ParmVarDecl *OldParam, MultiLevelTemplateArgumentList &Args, llvm::SmallVectorImpl &MaterializedTypedefs, bool TransformingOuterPatterns) { - TypeSourceInfo *OldDI = OldParam->getTypeSourceInfo(); - TypeSourceInfo *NewDI; - if (auto PackTL = OldDI->getTypeLoc().getAs()) { + TypeSourceInfo *OldTSI = OldParam->getTypeSourceInfo(); + TypeSourceInfo *NewTSI; + if (auto PackTL = OldTSI->getTypeLoc().getAs()) { // Expand out the one and only element in each inner pack. Sema::ArgPackSubstIndexRAII SubstIndex(SemaRef, 0u); - NewDI = + NewTSI = SemaRef.SubstType(PackTL.getPatternLoc(), Args, OldParam->getLocation(), OldParam->getDeclName()); - if (!NewDI) + if (!NewTSI) return nullptr; - NewDI = - SemaRef.CheckPackExpansion(NewDI, PackTL.getEllipsisLoc(), + NewTSI = + SemaRef.CheckPackExpansion(NewTSI, PackTL.getEllipsisLoc(), PackTL.getTypePtr()->getNumExpansions()); } else - NewDI = SemaRef.SubstType(OldDI, Args, OldParam->getLocation(), - OldParam->getDeclName()); - if (!NewDI) + NewTSI = SemaRef.SubstType(OldTSI, Args, OldParam->getLocation(), + OldParam->getDeclName()); + if (!NewTSI) return nullptr; // Extract the type. This (for instance) replaces references to typedef // members of the current instantiations with the definitions of those // typedefs, avoiding triggering instantiation of the deduced type during // deduction. - NewDI = ExtractTypeForDeductionGuide( - SemaRef, MaterializedTypedefs, NestedPattern, - TransformingOuterPatterns ? &Args : nullptr) - .transform(NewDI); - if (!NewDI) + NewTSI = ExtractTypeForDeductionGuide( + SemaRef, MaterializedTypedefs, NestedPattern, + TransformingOuterPatterns ? &Args : nullptr) + .transform(NewTSI); + if (!NewTSI) return nullptr; // Resolving a wording defect, we also inherit default arguments from the // constructor. @@ -667,7 +667,7 @@ struct ConvertConstructorToDeductionGuideTransform { if (OldParam->hasDefaultArg()) { // We don't care what the value is (we won't use it); just create a // placeholder to indicate there is a default argument. - QualType ParamTy = NewDI->getType(); + QualType ParamTy = NewTSI->getType(); NewDefArg = new (SemaRef.Context) OpaqueValueExpr(OldParam->getDefaultArgRange().getBegin(), ParamTy.getNonLValueExprType(SemaRef.Context), @@ -676,13 +676,13 @@ struct ConvertConstructorToDeductionGuideTransform { : VK_PRValue); } // Handle arrays and functions decay. - auto NewType = NewDI->getType(); + auto NewType = NewTSI->getType(); if (NewType->isArrayType() || NewType->isFunctionType()) NewType = SemaRef.Context.getDecayedType(NewType); ParmVarDecl *NewParam = ParmVarDecl::Create( SemaRef.Context, DC, OldParam->getInnerLocStart(), - OldParam->getLocation(), OldParam->getIdentifier(), NewType, NewDI, + OldParam->getLocation(), OldParam->getIdentifier(), NewType, NewTSI, OldParam->getStorageClass(), NewDefArg.get()); NewParam->setScopeInfo(OldParam->getFunctionScopeDepth(), OldParam->getFunctionScopeIndex()); diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp index 7f858050db13e..5fceacd0c00eb 100644 --- a/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -3156,25 +3156,25 @@ Sema::SubstParmVarDecl(ParmVarDecl *OldParm, const MultiLevelTemplateArgumentList &TemplateArgs, int indexAdjustment, UnsignedOrNone NumExpansions, bool ExpectParameterPack, bool EvaluateConstraint) { - TypeSourceInfo *OldDI = OldParm->getTypeSourceInfo(); - TypeSourceInfo *NewDI = nullptr; + TypeSourceInfo *OldTSI = OldParm->getTypeSourceInfo(); + TypeSourceInfo *NewTSI = nullptr; - TypeLoc OldTL = OldDI->getTypeLoc(); + TypeLoc OldTL = OldTSI->getTypeLoc(); if (PackExpansionTypeLoc ExpansionTL = OldTL.getAs()) { // We have a function parameter pack. Substitute into the pattern of the // expansion. - NewDI = SubstType(ExpansionTL.getPatternLoc(), TemplateArgs, - OldParm->getLocation(), OldParm->getDeclName()); - if (!NewDI) + NewTSI = SubstType(ExpansionTL.getPatternLoc(), TemplateArgs, + OldParm->getLocation(), OldParm->getDeclName()); + if (!NewTSI) return nullptr; - if (NewDI->getType()->containsUnexpandedParameterPack()) { + if (NewTSI->getType()->containsUnexpandedParameterPack()) { // We still have unexpanded parameter packs, which means that // our function parameter is still a function parameter pack. // Therefore, make its type a pack expansion type. - NewDI = CheckPackExpansion(NewDI, ExpansionTL.getEllipsisLoc(), - NumExpansions); + NewTSI = CheckPackExpansion(NewTSI, ExpansionTL.getEllipsisLoc(), + NumExpansions); } else if (ExpectParameterPack) { // We expected to get a parameter pack but didn't (because the type // itself is not a pack expansion type), so complain. This can occur when @@ -3182,18 +3182,18 @@ Sema::SubstParmVarDecl(ParmVarDecl *OldParm, // pack expansion. Diag(OldParm->getLocation(), diag::err_function_parameter_pack_without_parameter_packs) - << NewDI->getType(); + << NewTSI->getType(); return nullptr; } } else { - NewDI = SubstType(OldDI, TemplateArgs, OldParm->getLocation(), - OldParm->getDeclName()); + NewTSI = SubstType(OldTSI, TemplateArgs, OldParm->getLocation(), + OldParm->getDeclName()); } - if (!NewDI) + if (!NewTSI) return nullptr; - if (NewDI->getType()->isVoidType()) { + if (NewTSI->getType()->isVoidType()) { Diag(OldParm->getLocation(), diag::err_param_with_void_type); return nullptr; } @@ -3205,7 +3205,7 @@ Sema::SubstParmVarDecl(ParmVarDecl *OldParm, // here, when the instantiated versions of those referenced parameters are in // scope. if (TemplateTypeParmDecl *TTP = - GetContainedInventedTypeParmVisitor().Visit(OldDI->getType())) { + GetContainedInventedTypeParmVisitor().Visit(OldTSI->getType())) { if (const TypeConstraint *TC = TTP->getTypeConstraint()) { auto *Inst = cast_or_null( FindInstantiatedDecl(TTP->getLocation(), TTP, TemplateArgs)); @@ -3219,12 +3219,10 @@ Sema::SubstParmVarDecl(ParmVarDecl *OldParm, } } - ParmVarDecl *NewParm = CheckParameter(Context.getTranslationUnitDecl(), - OldParm->getInnerLocStart(), - OldParm->getLocation(), - OldParm->getIdentifier(), - NewDI->getType(), NewDI, - OldParm->getStorageClass()); + ParmVarDecl *NewParm = CheckParameter( + Context.getTranslationUnitDecl(), OldParm->getInnerLocStart(), + OldParm->getLocation(), OldParm->getIdentifier(), NewTSI->getType(), + NewTSI, OldParm->getStorageClass()); if (!NewParm) return nullptr; diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp index 28925cca8f956..681bfe0d8cbf8 100644 --- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -1506,17 +1506,17 @@ TemplateDeclInstantiator::VisitNamespaceAliasDecl(NamespaceAliasDecl *D) { Decl *TemplateDeclInstantiator::InstantiateTypedefNameDecl(TypedefNameDecl *D, bool IsTypeAlias) { bool Invalid = false; - TypeSourceInfo *DI = D->getTypeSourceInfo(); - if (DI->getType()->isInstantiationDependentType() || - DI->getType()->isVariablyModifiedType()) { - DI = SemaRef.SubstType(DI, TemplateArgs, - D->getLocation(), D->getDeclName()); - if (!DI) { + TypeSourceInfo *TSI = D->getTypeSourceInfo(); + if (TSI->getType()->isInstantiationDependentType() || + TSI->getType()->isVariablyModifiedType()) { + TSI = SemaRef.SubstType(TSI, TemplateArgs, D->getLocation(), + D->getDeclName()); + if (!TSI) { Invalid = true; - DI = SemaRef.Context.getTrivialTypeSourceInfo(SemaRef.Context.IntTy); + TSI = SemaRef.Context.getTrivialTypeSourceInfo(SemaRef.Context.IntTy); } } else { - SemaRef.MarkDeclarationsReferencedInType(D->getLocation(), DI->getType()); + SemaRef.MarkDeclarationsReferencedInType(D->getLocation(), TSI->getType()); } // HACK: 2012-10-23 g++ has a bug where it gets the value kind of ?: wrong. @@ -1525,7 +1525,7 @@ Decl *TemplateDeclInstantiator::InstantiateTypedefNameDecl(TypedefNameDecl *D, // semantics. See LWG issue 2141 for more information on the bug. The bugs // are fixed in g++ and libstdc++ 4.9.0 (2014-04-22). if (SemaRef.getPreprocessor().NeedsStdLibCxxWorkaroundBefore(2014'04'22)) { - const DecltypeType *DT = DI->getType()->getAs(); + const DecltypeType *DT = TSI->getType()->getAs(); CXXRecordDecl *RD = dyn_cast(D->getDeclContext()); if (DT && RD && isa(DT->getUnderlyingExpr()) && DT->isReferenceType() && @@ -1534,18 +1534,18 @@ Decl *TemplateDeclInstantiator::InstantiateTypedefNameDecl(TypedefNameDecl *D, D->getIdentifier() && D->getIdentifier()->isStr("type") && SemaRef.getSourceManager().isInSystemHeader(D->getBeginLoc())) // Fold it to the (non-reference) type which g++ would have produced. - DI = SemaRef.Context.getTrivialTypeSourceInfo( - DI->getType().getNonReferenceType()); + TSI = SemaRef.Context.getTrivialTypeSourceInfo( + TSI->getType().getNonReferenceType()); } // Create the new typedef TypedefNameDecl *Typedef; if (IsTypeAlias) Typedef = TypeAliasDecl::Create(SemaRef.Context, Owner, D->getBeginLoc(), - D->getLocation(), D->getIdentifier(), DI); + D->getLocation(), D->getIdentifier(), TSI); else Typedef = TypedefDecl::Create(SemaRef.Context, Owner, D->getBeginLoc(), - D->getLocation(), D->getIdentifier(), DI); + D->getLocation(), D->getIdentifier(), TSI); if (Invalid) Typedef->setInvalidDecl(); @@ -1554,7 +1554,7 @@ Decl *TemplateDeclInstantiator::InstantiateTypedefNameDecl(TypedefNameDecl *D, if (const TagType *oldTagType = D->getUnderlyingType()->getAs()) { TagDecl *oldTag = oldTagType->getDecl(); if (oldTag->getTypedefNameForAnonDecl() == D && !Invalid) { - TagDecl *newTag = DI->getType()->castAs()->getDecl(); + TagDecl *newTag = TSI->getType()->castAs()->getDecl(); assert(!newTag->hasNameForLinkage()); newTag->setTypedefNameForAnonDecl(Typedef); } @@ -1719,15 +1719,15 @@ Decl *TemplateDeclInstantiator::VisitVarDecl(VarDecl *D, ArrayRef *Bindings) { // Do substitution on the type of the declaration - TypeSourceInfo *DI = SemaRef.SubstType( + TypeSourceInfo *TSI = SemaRef.SubstType( D->getTypeSourceInfo(), TemplateArgs, D->getTypeSpecStartLoc(), - D->getDeclName(), /*AllowDeducedTST*/true); - if (!DI) + D->getDeclName(), /*AllowDeducedTST*/ true); + if (!TSI) return nullptr; - if (DI->getType()->isFunctionType()) { + if (TSI->getType()->isFunctionType()) { SemaRef.Diag(D->getLocation(), diag::err_variable_instantiates_to_function) - << D->isStaticDataMember() << DI->getType(); + << D->isStaticDataMember() << TSI->getType(); return nullptr; } @@ -1739,12 +1739,12 @@ Decl *TemplateDeclInstantiator::VisitVarDecl(VarDecl *D, VarDecl *Var; if (Bindings) Var = DecompositionDecl::Create(SemaRef.Context, DC, D->getInnerLocStart(), - D->getLocation(), DI->getType(), DI, + D->getLocation(), TSI->getType(), TSI, D->getStorageClass(), *Bindings); else Var = VarDecl::Create(SemaRef.Context, DC, D->getInnerLocStart(), - D->getLocation(), D->getIdentifier(), DI->getType(), - DI, D->getStorageClass()); + D->getLocation(), D->getIdentifier(), TSI->getType(), + TSI, D->getStorageClass()); // In ARC, infer 'retaining' for variables of retainable type. if (SemaRef.getLangOpts().ObjCAutoRefCount && @@ -1810,15 +1810,15 @@ Decl *TemplateDeclInstantiator::VisitAccessSpecDecl(AccessSpecDecl *D) { Decl *TemplateDeclInstantiator::VisitFieldDecl(FieldDecl *D) { bool Invalid = false; - TypeSourceInfo *DI = D->getTypeSourceInfo(); - if (DI->getType()->isInstantiationDependentType() || - DI->getType()->isVariablyModifiedType()) { - DI = SemaRef.SubstType(DI, TemplateArgs, - D->getLocation(), D->getDeclName()); - if (!DI) { - DI = D->getTypeSourceInfo(); + TypeSourceInfo *TSI = D->getTypeSourceInfo(); + if (TSI->getType()->isInstantiationDependentType() || + TSI->getType()->isVariablyModifiedType()) { + TSI = SemaRef.SubstType(TSI, TemplateArgs, D->getLocation(), + D->getDeclName()); + if (!TSI) { + TSI = D->getTypeSourceInfo(); Invalid = true; - } else if (DI->getType()->isFunctionType()) { + } else if (TSI->getType()->isFunctionType()) { // C++ [temp.arg.type]p3: // If a declaration acquires a function type through a type // dependent on a template-parameter and this causes a @@ -1826,11 +1826,11 @@ Decl *TemplateDeclInstantiator::VisitFieldDecl(FieldDecl *D) { // function declarator to have function type, the program is // ill-formed. SemaRef.Diag(D->getLocation(), diag::err_field_instantiates_to_function) - << DI->getType(); + << TSI->getType(); Invalid = true; } } else { - SemaRef.MarkDeclarationsReferencedInType(D->getLocation(), DI->getType()); + SemaRef.MarkDeclarationsReferencedInType(D->getLocation(), TSI->getType()); } Expr *BitWidth = D->getBitWidth(); @@ -1850,16 +1850,10 @@ Decl *TemplateDeclInstantiator::VisitFieldDecl(FieldDecl *D) { BitWidth = InstantiatedBitWidth.getAs(); } - FieldDecl *Field = SemaRef.CheckFieldDecl(D->getDeclName(), - DI->getType(), DI, - cast(Owner), - D->getLocation(), - D->isMutable(), - BitWidth, - D->getInClassInitStyle(), - D->getInnerLocStart(), - D->getAccess(), - nullptr); + FieldDecl *Field = SemaRef.CheckFieldDecl( + D->getDeclName(), TSI->getType(), TSI, cast(Owner), + D->getLocation(), D->isMutable(), BitWidth, D->getInClassInitStyle(), + D->getInnerLocStart(), D->getAccess(), nullptr); if (!Field) { cast(Owner)->setInvalidDecl(); return nullptr; @@ -1892,19 +1886,19 @@ Decl *TemplateDeclInstantiator::VisitFieldDecl(FieldDecl *D) { Decl *TemplateDeclInstantiator::VisitMSPropertyDecl(MSPropertyDecl *D) { bool Invalid = false; - TypeSourceInfo *DI = D->getTypeSourceInfo(); + TypeSourceInfo *TSI = D->getTypeSourceInfo(); - if (DI->getType()->isVariablyModifiedType()) { + if (TSI->getType()->isVariablyModifiedType()) { SemaRef.Diag(D->getLocation(), diag::err_property_is_variably_modified) << D; Invalid = true; - } else if (DI->getType()->isInstantiationDependentType()) { - DI = SemaRef.SubstType(DI, TemplateArgs, - D->getLocation(), D->getDeclName()); - if (!DI) { - DI = D->getTypeSourceInfo(); + } else if (TSI->getType()->isInstantiationDependentType()) { + TSI = SemaRef.SubstType(TSI, TemplateArgs, D->getLocation(), + D->getDeclName()); + if (!TSI) { + TSI = D->getTypeSourceInfo(); Invalid = true; - } else if (DI->getType()->isFunctionType()) { + } else if (TSI->getType()->isFunctionType()) { // C++ [temp.arg.type]p3: // If a declaration acquires a function type through a type // dependent on a template-parameter and this causes a @@ -1912,16 +1906,17 @@ Decl *TemplateDeclInstantiator::VisitMSPropertyDecl(MSPropertyDecl *D) { // function declarator to have function type, the program is // ill-formed. SemaRef.Diag(D->getLocation(), diag::err_field_instantiates_to_function) - << DI->getType(); + << TSI->getType(); Invalid = true; } } else { - SemaRef.MarkDeclarationsReferencedInType(D->getLocation(), DI->getType()); + SemaRef.MarkDeclarationsReferencedInType(D->getLocation(), TSI->getType()); } MSPropertyDecl *Property = MSPropertyDecl::Create( - SemaRef.Context, Owner, D->getLocation(), D->getDeclName(), DI->getType(), - DI, D->getBeginLoc(), D->getGetterId(), D->getSetterId()); + SemaRef.Context, Owner, D->getLocation(), D->getDeclName(), + TSI->getType(), TSI, D->getBeginLoc(), D->getGetterId(), + D->getSetterId()); SemaRef.InstantiateAttrs(TemplateArgs, D, Property, LateAttrs, StartingScope); @@ -3584,7 +3579,7 @@ Decl *TemplateDeclInstantiator::VisitNonTypeTemplateParmDecl( SmallVector ExpandedParameterPackTypesAsWritten; SmallVector ExpandedParameterPackTypes; bool IsExpandedParameterPack = false; - TypeSourceInfo *DI; + TypeSourceInfo *TSI; QualType T; bool Invalid = false; @@ -3594,24 +3589,24 @@ Decl *TemplateDeclInstantiator::VisitNonTypeTemplateParmDecl( ExpandedParameterPackTypes.reserve(D->getNumExpansionTypes()); ExpandedParameterPackTypesAsWritten.reserve(D->getNumExpansionTypes()); for (unsigned I = 0, N = D->getNumExpansionTypes(); I != N; ++I) { - TypeSourceInfo *NewDI = + TypeSourceInfo *NewTSI = SemaRef.SubstType(D->getExpansionTypeSourceInfo(I), TemplateArgs, D->getLocation(), D->getDeclName()); - if (!NewDI) + if (!NewTSI) return nullptr; QualType NewT = - SemaRef.CheckNonTypeTemplateParameterType(NewDI, D->getLocation()); + SemaRef.CheckNonTypeTemplateParameterType(NewTSI, D->getLocation()); if (NewT.isNull()) return nullptr; - ExpandedParameterPackTypesAsWritten.push_back(NewDI); + ExpandedParameterPackTypesAsWritten.push_back(NewTSI); ExpandedParameterPackTypes.push_back(NewT); } IsExpandedParameterPack = true; - DI = D->getTypeSourceInfo(); - T = DI->getType(); + TSI = D->getTypeSourceInfo(); + T = TSI->getType(); } else if (D->isPackExpansion()) { // The non-type template parameter pack's type is a pack expansion of types. // Determine whether we need to expand this parameter pack into separate @@ -3637,18 +3632,17 @@ Decl *TemplateDeclInstantiator::VisitNonTypeTemplateParmDecl( if (Expand) { for (unsigned I = 0; I != *NumExpansions; ++I) { Sema::ArgPackSubstIndexRAII SubstIndex(SemaRef, I); - TypeSourceInfo *NewDI = SemaRef.SubstType(Pattern, TemplateArgs, - D->getLocation(), - D->getDeclName()); - if (!NewDI) + TypeSourceInfo *NewTSI = SemaRef.SubstType( + Pattern, TemplateArgs, D->getLocation(), D->getDeclName()); + if (!NewTSI) return nullptr; QualType NewT = - SemaRef.CheckNonTypeTemplateParameterType(NewDI, D->getLocation()); + SemaRef.CheckNonTypeTemplateParameterType(NewTSI, D->getLocation()); if (NewT.isNull()) return nullptr; - ExpandedParameterPackTypesAsWritten.push_back(NewDI); + ExpandedParameterPackTypesAsWritten.push_back(NewTSI); ExpandedParameterPackTypes.push_back(NewT); } @@ -3656,8 +3650,8 @@ Decl *TemplateDeclInstantiator::VisitNonTypeTemplateParmDecl( // expanded parameter pack is the original expansion type, but callers // will end up using the expanded parameter pack types for type-checking. IsExpandedParameterPack = true; - DI = D->getTypeSourceInfo(); - T = DI->getType(); + TSI = D->getTypeSourceInfo(); + T = TSI->getType(); } else { // We cannot fully expand the pack expansion now, so substitute into the // pattern and create a new pack expansion type. @@ -3669,22 +3663,22 @@ Decl *TemplateDeclInstantiator::VisitNonTypeTemplateParmDecl( return nullptr; SemaRef.CheckNonTypeTemplateParameterType(NewPattern, D->getLocation()); - DI = SemaRef.CheckPackExpansion(NewPattern, Expansion.getEllipsisLoc(), - NumExpansions); - if (!DI) + TSI = SemaRef.CheckPackExpansion(NewPattern, Expansion.getEllipsisLoc(), + NumExpansions); + if (!TSI) return nullptr; - T = DI->getType(); + T = TSI->getType(); } } else { // Simple case: substitution into a parameter that is not a parameter pack. - DI = SemaRef.SubstType(D->getTypeSourceInfo(), TemplateArgs, - D->getLocation(), D->getDeclName()); - if (!DI) + TSI = SemaRef.SubstType(D->getTypeSourceInfo(), TemplateArgs, + D->getLocation(), D->getDeclName()); + if (!TSI) return nullptr; // Check that this type is acceptable for a non-type template parameter. - T = SemaRef.CheckNonTypeTemplateParameterType(DI, D->getLocation()); + T = SemaRef.CheckNonTypeTemplateParameterType(TSI, D->getLocation()); if (T.isNull()) { T = SemaRef.Context.IntTy; Invalid = true; @@ -3696,20 +3690,20 @@ Decl *TemplateDeclInstantiator::VisitNonTypeTemplateParmDecl( Param = NonTypeTemplateParmDecl::Create( SemaRef.Context, Owner, D->getInnerLocStart(), D->getLocation(), D->getDepth() - TemplateArgs.getNumSubstitutedLevels(), - D->getPosition(), D->getIdentifier(), T, DI, ExpandedParameterPackTypes, - ExpandedParameterPackTypesAsWritten); + D->getPosition(), D->getIdentifier(), T, TSI, + ExpandedParameterPackTypes, ExpandedParameterPackTypesAsWritten); else Param = NonTypeTemplateParmDecl::Create( SemaRef.Context, Owner, D->getInnerLocStart(), D->getLocation(), D->getDepth() - TemplateArgs.getNumSubstitutedLevels(), - D->getPosition(), D->getIdentifier(), T, D->isParameterPack(), DI); + D->getPosition(), D->getIdentifier(), T, D->isParameterPack(), TSI); - if (AutoTypeLoc AutoLoc = DI->getTypeLoc().getContainedAutoTypeLoc()) + if (AutoTypeLoc AutoLoc = TSI->getTypeLoc().getContainedAutoTypeLoc()) if (AutoLoc.isConstrained()) { SourceLocation EllipsisLoc; if (IsExpandedParameterPack) EllipsisLoc = - DI->getTypeLoc().getAs().getEllipsisLoc(); + TSI->getTypeLoc().getAs().getEllipsisLoc(); else if (auto *Constraint = dyn_cast_if_present( D->getPlaceholderTypeConstraint())) EllipsisLoc = Constraint->getEllipsisLoc(); @@ -4642,22 +4636,22 @@ TemplateDeclInstantiator::VisitVarTemplateSpecializationDecl( VarTemplateSpecializationDecl *PrevDecl) { // Do substitution on the type of the declaration - TypeSourceInfo *DI = + TypeSourceInfo *TSI = SemaRef.SubstType(D->getTypeSourceInfo(), TemplateArgs, D->getTypeSpecStartLoc(), D->getDeclName()); - if (!DI) + if (!TSI) return nullptr; - if (DI->getType()->isFunctionType()) { + if (TSI->getType()->isFunctionType()) { SemaRef.Diag(D->getLocation(), diag::err_variable_instantiates_to_function) - << D->isStaticDataMember() << DI->getType(); + << D->isStaticDataMember() << TSI->getType(); return nullptr; } // Build the instantiated declaration VarTemplateSpecializationDecl *Var = VarTemplateSpecializationDecl::Create( SemaRef.Context, Owner, D->getInnerLocStart(), D->getLocation(), - VarTemplate, DI->getType(), DI, D->getStorageClass(), Converted); + VarTemplate, TSI->getType(), TSI, D->getStorageClass(), Converted); if (!PrevDecl) { void *InsertPos = nullptr; VarTemplate->findSpecialization(Converted, InsertPos); @@ -5005,16 +4999,16 @@ TemplateDeclInstantiator::InstantiateVarTemplatePartialSpecialization( InstParams, InsertPos); // Do substitution on the type of the declaration - TypeSourceInfo *DI = SemaRef.SubstType( + TypeSourceInfo *TSI = SemaRef.SubstType( PartialSpec->getTypeSourceInfo(), TemplateArgs, PartialSpec->getTypeSpecStartLoc(), PartialSpec->getDeclName()); - if (!DI) + if (!TSI) return nullptr; - if (DI->getType()->isFunctionType()) { + if (TSI->getType()->isFunctionType()) { SemaRef.Diag(PartialSpec->getLocation(), diag::err_variable_instantiates_to_function) - << PartialSpec->isStaticDataMember() << DI->getType(); + << PartialSpec->isStaticDataMember() << TSI->getType(); return nullptr; } @@ -5022,8 +5016,8 @@ TemplateDeclInstantiator::InstantiateVarTemplatePartialSpecialization( VarTemplatePartialSpecializationDecl *InstPartialSpec = VarTemplatePartialSpecializationDecl::Create( SemaRef.Context, Owner, PartialSpec->getInnerLocStart(), - PartialSpec->getLocation(), InstParams, VarTemplate, DI->getType(), - DI, PartialSpec->getStorageClass(), CTAI.CanonicalConverted); + PartialSpec->getLocation(), InstParams, VarTemplate, TSI->getType(), + TSI, PartialSpec->getStorageClass(), CTAI.CanonicalConverted); InstPartialSpec->setTemplateArgsAsWritten(InstTemplateArgs); @@ -6026,14 +6020,14 @@ VarTemplateSpecializationDecl *Sema::CompleteVarTemplateSpecializationDecl( "don't have a definition to instantiate from"); // Do substitution on the type of the declaration - TypeSourceInfo *DI = + TypeSourceInfo *TSI = SubstType(PatternDecl->getTypeSourceInfo(), TemplateArgs, PatternDecl->getTypeSpecStartLoc(), PatternDecl->getDeclName()); - if (!DI) + if (!TSI) return nullptr; // Update the type of this variable template specialization. - VarSpec->setType(DI->getType()); + VarSpec->setType(TSI->getType()); // Convert the declaration into a definition now. VarSpec->setCompleteDefinition(); diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index c483930705057..eb8b1352d1be1 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -2795,13 +2795,14 @@ QualType Sema::GetTypeFromParser(ParsedType Ty, TypeSourceInfo **TInfo) { return QualType(); } - TypeSourceInfo *DI = nullptr; + TypeSourceInfo *TSI = nullptr; if (const LocInfoType *LIT = dyn_cast(QT)) { QT = LIT->getType(); - DI = LIT->getTypeSourceInfo(); + TSI = LIT->getTypeSourceInfo(); } - if (TInfo) *TInfo = DI; + if (TInfo) + *TInfo = TSI; return QT; } diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 8c20078e97a13..dffd7c1def8e2 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -371,7 +371,7 @@ class TreeTransform { /// may override this function (to take over all type /// transformations) or some set of the TransformXXXType functions /// to alter the transformation. - TypeSourceInfo *TransformType(TypeSourceInfo *DI); + TypeSourceInfo *TransformType(TypeSourceInfo *TSI); /// Transform the given type-with-location into a new /// type, collecting location information in the given builder @@ -387,7 +387,7 @@ class TreeTransform { /// template arguments. /// @{ QualType TransformTypeWithDeducedTST(QualType T); - TypeSourceInfo *TransformTypeWithDeducedTST(TypeSourceInfo *DI); + TypeSourceInfo *TransformTypeWithDeducedTST(TypeSourceInfo *TSI); /// @} /// The reason why the value of a statement is not discarded, if any. @@ -4995,15 +4995,15 @@ bool TreeTransform::TransformTemplateArgument( } case TemplateArgument::Type: { - TypeSourceInfo *DI = Input.getTypeSourceInfo(); - if (!DI) - DI = InventTypeSourceInfo(Input.getArgument().getAsType()); + TypeSourceInfo *TSI = Input.getTypeSourceInfo(); + if (!TSI) + TSI = InventTypeSourceInfo(Input.getArgument().getAsType()); - DI = getDerived().TransformType(DI); - if (!DI) + TSI = getDerived().TransformType(TSI); + if (!TSI) return true; - Output = TemplateArgumentLoc(TemplateArgument(DI->getType()), DI); + Output = TemplateArgumentLoc(TemplateArgument(TSI->getType()), TSI); return false; } @@ -5360,28 +5360,28 @@ QualType TreeTransform::TransformType(QualType T) { // Temporary workaround. All of these transformations should // eventually turn into transformations on TypeLocs. - TypeSourceInfo *DI = getSema().Context.getTrivialTypeSourceInfo(T, - getDerived().getBaseLocation()); + TypeSourceInfo *TSI = getSema().Context.getTrivialTypeSourceInfo( + T, getDerived().getBaseLocation()); - TypeSourceInfo *NewDI = getDerived().TransformType(DI); + TypeSourceInfo *NewTSI = getDerived().TransformType(TSI); - if (!NewDI) + if (!NewTSI) return QualType(); - return NewDI->getType(); + return NewTSI->getType(); } -template -TypeSourceInfo *TreeTransform::TransformType(TypeSourceInfo *DI) { +template +TypeSourceInfo *TreeTransform::TransformType(TypeSourceInfo *TSI) { // Refine the base location to the type's location. - TemporaryBase Rebase(*this, DI->getTypeLoc().getBeginLoc(), + TemporaryBase Rebase(*this, TSI->getTypeLoc().getBeginLoc(), getDerived().getBaseEntity()); - if (getDerived().AlreadyTransformed(DI->getType())) - return DI; + if (getDerived().AlreadyTransformed(TSI->getType())) + return TSI; TypeLocBuilder TLB; - TypeLoc TL = DI->getTypeLoc(); + TypeLoc TL = TSI->getTypeLoc(); TLB.reserve(TL.getFullDataSize()); QualType Result = getDerived().TransformType(TLB, TL); @@ -5413,27 +5413,27 @@ QualType TreeTransform::TransformTypeWithDeducedTST(QualType T) { if (getDerived().AlreadyTransformed(T)) return T; - TypeSourceInfo *DI = getSema().Context.getTrivialTypeSourceInfo(T, - getDerived().getBaseLocation()); - TypeSourceInfo *NewDI = getDerived().TransformTypeWithDeducedTST(DI); - return NewDI ? NewDI->getType() : QualType(); + TypeSourceInfo *TSI = getSema().Context.getTrivialTypeSourceInfo( + T, getDerived().getBaseLocation()); + TypeSourceInfo *NewTSI = getDerived().TransformTypeWithDeducedTST(TSI); + return NewTSI ? NewTSI->getType() : QualType(); } -template +template TypeSourceInfo * -TreeTransform::TransformTypeWithDeducedTST(TypeSourceInfo *DI) { - if (!isa(DI->getType())) - return TransformType(DI); +TreeTransform::TransformTypeWithDeducedTST(TypeSourceInfo *TSI) { + if (!isa(TSI->getType())) + return TransformType(TSI); // Refine the base location to the type's location. - TemporaryBase Rebase(*this, DI->getTypeLoc().getBeginLoc(), + TemporaryBase Rebase(*this, TSI->getTypeLoc().getBeginLoc(), getDerived().getBaseEntity()); - if (getDerived().AlreadyTransformed(DI->getType())) - return DI; + if (getDerived().AlreadyTransformed(TSI->getType())) + return TSI; TypeLocBuilder TLB; - TypeLoc TL = DI->getTypeLoc(); + TypeLoc TL = TSI->getTypeLoc(); TLB.reserve(TL.getFullDataSize()); auto QTL = TL.getAs(); @@ -6258,17 +6258,17 @@ template ParmVarDecl *TreeTransform::TransformFunctionTypeParam( ParmVarDecl *OldParm, int indexAdjustment, UnsignedOrNone NumExpansions, bool ExpectParameterPack) { - TypeSourceInfo *OldDI = OldParm->getTypeSourceInfo(); - TypeSourceInfo *NewDI = nullptr; + TypeSourceInfo *OldTSI = OldParm->getTypeSourceInfo(); + TypeSourceInfo *NewTSI = nullptr; - if (NumExpansions && isa(OldDI->getType())) { + if (NumExpansions && isa(OldTSI->getType())) { // If we're substituting into a pack expansion type and we know the // length we want to expand to, just substitute for the pattern. - TypeLoc OldTL = OldDI->getTypeLoc(); + TypeLoc OldTL = OldTSI->getTypeLoc(); PackExpansionTypeLoc OldExpansionTL = OldTL.castAs(); TypeLocBuilder TLB; - TypeLoc NewTL = OldDI->getTypeLoc(); + TypeLoc NewTL = OldTSI->getTypeLoc(); TLB.reserve(NewTL.getFullDataSize()); QualType Result = getDerived().TransformType(TLB, @@ -6286,24 +6286,20 @@ ParmVarDecl *TreeTransform::TransformFunctionTypeParam( PackExpansionTypeLoc NewExpansionTL = TLB.push(Result); NewExpansionTL.setEllipsisLoc(OldExpansionTL.getEllipsisLoc()); - NewDI = TLB.getTypeSourceInfo(SemaRef.Context, Result); + NewTSI = TLB.getTypeSourceInfo(SemaRef.Context, Result); } else - NewDI = getDerived().TransformType(OldDI); - if (!NewDI) + NewTSI = getDerived().TransformType(OldTSI); + if (!NewTSI) return nullptr; - if (NewDI == OldDI && indexAdjustment == 0) + if (NewTSI == OldTSI && indexAdjustment == 0) return OldParm; - ParmVarDecl *newParm = ParmVarDecl::Create(SemaRef.Context, - OldParm->getDeclContext(), - OldParm->getInnerLocStart(), - OldParm->getLocation(), - OldParm->getIdentifier(), - NewDI->getType(), - NewDI, - OldParm->getStorageClass(), - /* DefArg */ nullptr); + ParmVarDecl *newParm = ParmVarDecl::Create( + SemaRef.Context, OldParm->getDeclContext(), OldParm->getInnerLocStart(), + OldParm->getLocation(), OldParm->getIdentifier(), NewTSI->getType(), + NewTSI, OldParm->getStorageClass(), + /* DefArg */ nullptr); newParm->setScopeInfo(OldParm->getFunctionScopeDepth(), OldParm->getFunctionScopeIndex() + indexAdjustment); getDerived().transformedLocalDecl(OldParm, {newParm}); diff --git a/clang/lib/Sema/TypeLocBuilder.h b/clang/lib/Sema/TypeLocBuilder.h index 0c27088a1748b..e84e79aee8f0d 100644 --- a/clang/lib/Sema/TypeLocBuilder.h +++ b/clang/lib/Sema/TypeLocBuilder.h @@ -113,9 +113,9 @@ class TypeLocBuilder { #endif size_t FullDataSize = Capacity - Index; - TypeSourceInfo *DI = Context.CreateTypeSourceInfo(T, FullDataSize); - memcpy(DI->getTypeLoc().getOpaqueData(), &Buffer[Index], FullDataSize); - return DI; + TypeSourceInfo *TSI = Context.CreateTypeSourceInfo(T, FullDataSize); + memcpy(TSI->getTypeLoc().getOpaqueData(), &Buffer[Index], FullDataSize); + return TSI; } /// Copies the type-location information to the given AST context and From 63d6e3eb4664c150dcb0c8cb611b30fb6307dbc4 Mon Sep 17 00:00:00 2001 From: Santanu Das Date: Wed, 5 Nov 2025 20:26:00 +0530 Subject: [PATCH 21/39] [DebugInfo] Assign best possible debugloc to bundle (#164573) The debug info attached to the BUNDLE is the first instruction in the BUNDLE, even if a better debug info (line:column) is present in the later instructions of the bundle. The patch tries to get a better debug info first. If not, then a worse debug info without line number is chosen. --------- Co-authored-by: Vladislav Dzhidzhoev Co-authored-by: Orlando Cazalet-Hyams --- llvm/lib/CodeGen/MachineInstrBundle.cpp | 20 +++++--- llvm/test/DebugInfo/Hexagon/lit.local.cfg | 2 + llvm/test/DebugInfo/Hexagon/packet-debug.mir | 48 ++++++++++++++++++++ 3 files changed, 63 insertions(+), 7 deletions(-) create mode 100644 llvm/test/DebugInfo/Hexagon/lit.local.cfg create mode 100644 llvm/test/DebugInfo/Hexagon/packet-debug.mir diff --git a/llvm/lib/CodeGen/MachineInstrBundle.cpp b/llvm/lib/CodeGen/MachineInstrBundle.cpp index 88d81993fbe55..f4c1a8bf339c8 100644 --- a/llvm/lib/CodeGen/MachineInstrBundle.cpp +++ b/llvm/lib/CodeGen/MachineInstrBundle.cpp @@ -83,15 +83,21 @@ llvm::createUnpackMachineBundles( return new UnpackMachineBundles(std::move(Ftor)); } -/// Return the first found DebugLoc that has a DILocation, given a range of -/// instructions. The search range is from FirstMI to LastMI (exclusive). If no -/// DILocation is found, then an empty location is returned. +/// Return the first DebugLoc that has line number information, given a +/// range of instructions. The search range is from FirstMI to LastMI +/// (exclusive). Otherwise return the first DILocation or an empty location if +/// there are none. static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI) { - for (auto MII = FirstMI; MII != LastMI; ++MII) - if (MII->getDebugLoc()) - return MII->getDebugLoc(); - return DebugLoc(); + DebugLoc DL; + for (auto MII = FirstMI; MII != LastMI; ++MII) { + if (DebugLoc MIIDL = MII->getDebugLoc()) { + if (MIIDL.getLine() != 0) + return MIIDL; + DL = MIIDL.get(); + } + } + return DL; } /// Check if target reg is contained in given lists, which are: diff --git a/llvm/test/DebugInfo/Hexagon/lit.local.cfg b/llvm/test/DebugInfo/Hexagon/lit.local.cfg new file mode 100644 index 0000000000000..3bed54b1a88d2 --- /dev/null +++ b/llvm/test/DebugInfo/Hexagon/lit.local.cfg @@ -0,0 +1,2 @@ +if not "Hexagon" in config.root.targets: + config.unsupported = True diff --git a/llvm/test/DebugInfo/Hexagon/packet-debug.mir b/llvm/test/DebugInfo/Hexagon/packet-debug.mir new file mode 100644 index 0000000000000..485b543b6e176 --- /dev/null +++ b/llvm/test/DebugInfo/Hexagon/packet-debug.mir @@ -0,0 +1,48 @@ +# RUN: llc -mtriple=hexagon -run-pass hexagon-packetizer %s -o - | FileCheck %s + +# CHECK-LABEL: name: factorial + +# The first bundle in bb.0 should have debug-location !19 (line 9), +# not !18 (line 0) from the DBG_VALUE instructions. +# CHECK: bb.0: +# CHECK: BUNDLE {{.*}}line: 9 + +--- | + define void @factorial() { ret void } + + !llvm.dbg.cu = !{!2} + !llvm.module.flags = !{!6, !7} + + !2 = distinct !DICompileUnit(language: DW_LANG_C11, file: !3, producer: "test", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug) + !3 = !DIFile(filename: "fact.c", directory: "/test") + !5 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) + !6 = !{i32 2, !"Debug Info Version", i32 3} + !7 = !{i32 1, !"wchar_size", i32 4} + !12 = distinct !DISubprogram(name: "factorial", scope: !3, file: !3, line: 6, type: !13, scopeLine: 7, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2) + !13 = !DISubroutineType(types: !14) + !14 = !{!5, !5} + !16 = !DILocalVariable(name: "i", arg: 1, scope: !12, file: !3, line: 6, type: !5) + !18 = !DILocation(line: 0, scope: !12) + !19 = !DILocation(line: 9, column: 9, scope: !12) + !21 = !DILocation(line: 9, column: 7, scope: !12) + +... +--- +name: factorial +alignment: 16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $r0 + + DBG_VALUE $r0, $noreg, !16, !DIExpression(), debug-location !18 + $r2 = A2_tfr $r0 + DBG_VALUE $r2, $noreg, !16, !DIExpression(), debug-location !18 + renamable $p0 = C2_cmpeqi killed $r0, 1, debug-location !19 + renamable $r0 = A2_tfrsi 1 + J2_jumpt killed $p0, %bb.1, implicit-def $pc, debug-location !21 + + bb.1: + PS_jmpret $r31, implicit-def dead $pc + +... From 14c76437ee1ae5ae2e641eb271988939163b8981 Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Wed, 5 Nov 2025 16:02:19 +0100 Subject: [PATCH 22/39] [clang][bytecode] Check types when loading values (#165385) We need to allow BitCasts between pointer types to different prim types, but that means we need to catch the problem at a later stage, i.e. when loading the values. Fixes https://github.com/llvm/llvm-project/issues/158527 Fixes https://github.com/llvm/llvm-project/issues/163778 --- clang/lib/AST/ByteCode/Compiler.cpp | 20 +++++++++++- clang/lib/AST/ByteCode/Compiler.h | 2 ++ clang/lib/AST/ByteCode/Interp.h | 27 +++++++++++----- clang/lib/AST/ByteCode/PrimType.h | 4 +++ clang/lib/AST/ByteCode/Program.cpp | 50 +++++++---------------------- clang/test/AST/ByteCode/invalid.cpp | 23 +++++++++++++ 6 files changed, 78 insertions(+), 48 deletions(-) diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp index 6c088469a3ca2..4e634000adc3b 100644 --- a/clang/lib/AST/ByteCode/Compiler.cpp +++ b/clang/lib/AST/ByteCode/Compiler.cpp @@ -208,6 +208,19 @@ template class LocOverrideScope final { } // namespace interp } // namespace clang +template +bool Compiler::isValidBitCast(const CastExpr *E) { + QualType FromTy = E->getSubExpr()->getType()->getPointeeType(); + QualType ToTy = E->getType()->getPointeeType(); + + if (classify(FromTy) == classify(ToTy)) + return true; + + if (FromTy->isVoidType() || ToTy->isVoidType()) + return true; + return false; +} + template bool Compiler::VisitCastExpr(const CastExpr *CE) { const Expr *SubExpr = CE->getSubExpr(); @@ -476,8 +489,9 @@ bool Compiler::VisitCastExpr(const CastExpr *CE) { return this->delegate(SubExpr); case CK_BitCast: { + QualType CETy = CE->getType(); // Reject bitcasts to atomic types. - if (CE->getType()->isAtomicType()) { + if (CETy->isAtomicType()) { if (!this->discard(SubExpr)) return false; return this->emitInvalidCast(CastKind::Reinterpret, /*Fatal=*/true, CE); @@ -492,6 +506,10 @@ bool Compiler::VisitCastExpr(const CastExpr *CE) { if (!FromT || !ToT) return false; + if (!this->isValidBitCast(CE) && + !this->emitInvalidCast(CastKind::ReinterpretLike, /*Fatal=*/false, CE)) + return false; + assert(isPtrType(*FromT)); assert(isPtrType(*ToT)); if (FromT == ToT) { diff --git a/clang/lib/AST/ByteCode/Compiler.h b/clang/lib/AST/ByteCode/Compiler.h index 5c46f75af4da3..fac0a7f4e1886 100644 --- a/clang/lib/AST/ByteCode/Compiler.h +++ b/clang/lib/AST/ByteCode/Compiler.h @@ -425,6 +425,8 @@ class Compiler : public ConstStmtVisitor, bool>, bool refersToUnion(const Expr *E); + bool isValidBitCast(const CastExpr *E); + protected: /// Variable to storage mapping. llvm::DenseMap Locals; diff --git a/clang/lib/AST/ByteCode/Interp.h b/clang/lib/AST/ByteCode/Interp.h index 5ab9c8ee75a51..6877b03f5916b 100644 --- a/clang/lib/AST/ByteCode/Interp.h +++ b/clang/lib/AST/ByteCode/Interp.h @@ -1915,6 +1915,9 @@ bool Load(InterpState &S, CodePtr OpPC) { return false; if (!Ptr.isBlockPointer()) return false; + if (const Descriptor *D = Ptr.getFieldDesc(); + !(D->isPrimitive() || D->isPrimitiveArray()) || D->getPrimType() != Name) + return false; S.Stk.push(Ptr.deref()); return true; } @@ -1926,6 +1929,9 @@ bool LoadPop(InterpState &S, CodePtr OpPC) { return false; if (!Ptr.isBlockPointer()) return false; + if (const Descriptor *D = Ptr.getFieldDesc(); + !(D->isPrimitive() || D->isPrimitiveArray()) || D->getPrimType() != Name) + return false; S.Stk.push(Ptr.deref()); return true; } @@ -3288,12 +3294,18 @@ inline bool InvalidCast(InterpState &S, CodePtr OpPC, CastKind Kind, bool Fatal) { const SourceLocation &Loc = S.Current->getLocation(OpPC); - if (Kind == CastKind::Reinterpret) { + switch (Kind) { + case CastKind::Reinterpret: S.CCEDiag(Loc, diag::note_constexpr_invalid_cast) - << static_cast(Kind) << S.Current->getRange(OpPC); + << diag::ConstexprInvalidCastKind::Reinterpret + << S.Current->getRange(OpPC); return !Fatal; - } - if (Kind == CastKind::Volatile) { + case CastKind::ReinterpretLike: + S.CCEDiag(Loc, diag::note_constexpr_invalid_cast) + << diag::ConstexprInvalidCastKind::ThisConversionOrReinterpret + << S.getLangOpts().CPlusPlus << S.Current->getRange(OpPC); + return !Fatal; + case CastKind::Volatile: if (!S.checkingPotentialConstantExpression()) { const auto *E = cast(S.Current->getExpr(OpPC)); if (S.getLangOpts().CPlusPlus) @@ -3304,14 +3316,13 @@ inline bool InvalidCast(InterpState &S, CodePtr OpPC, CastKind Kind, } return false; - } - if (Kind == CastKind::Dynamic) { + case CastKind::Dynamic: assert(!S.getLangOpts().CPlusPlus20); - S.CCEDiag(S.Current->getSource(OpPC), diag::note_constexpr_invalid_cast) + S.CCEDiag(Loc, diag::note_constexpr_invalid_cast) << diag::ConstexprInvalidCastKind::Dynamic; return true; } - + llvm_unreachable("Unhandled CastKind"); return false; } diff --git a/clang/lib/AST/ByteCode/PrimType.h b/clang/lib/AST/ByteCode/PrimType.h index 54fd39ac6fcc8..f0454b484ff98 100644 --- a/clang/lib/AST/ByteCode/PrimType.h +++ b/clang/lib/AST/ByteCode/PrimType.h @@ -101,6 +101,7 @@ inline constexpr bool isSignedType(PrimType T) { enum class CastKind : uint8_t { Reinterpret, + ReinterpretLike, Volatile, Dynamic, }; @@ -111,6 +112,9 @@ inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, case interp::CastKind::Reinterpret: OS << "reinterpret_cast"; break; + case interp::CastKind::ReinterpretLike: + OS << "reinterpret_like"; + break; case interp::CastKind::Volatile: OS << "volatile"; break; diff --git a/clang/lib/AST/ByteCode/Program.cpp b/clang/lib/AST/ByteCode/Program.cpp index 2425373ab2ef8..4d34e0b0a9b46 100644 --- a/clang/lib/AST/ByteCode/Program.cpp +++ b/clang/lib/AST/ByteCode/Program.cpp @@ -36,30 +36,19 @@ unsigned Program::createGlobalString(const StringLiteral *S, const Expr *Base) { const size_t BitWidth = CharWidth * Ctx.getCharBit(); unsigned StringLength = S->getLength(); - PrimType CharType; - switch (CharWidth) { - case 1: - CharType = PT_Sint8; - break; - case 2: - CharType = PT_Uint16; - break; - case 4: - CharType = PT_Uint32; - break; - default: - llvm_unreachable("unsupported character width"); - } + OptPrimType CharType = + Ctx.classify(S->getType()->castAsArrayTypeUnsafe()->getElementType()); + assert(CharType); if (!Base) Base = S; // Create a descriptor for the string. - Descriptor *Desc = - allocateDescriptor(Base, CharType, Descriptor::GlobalMD, StringLength + 1, - /*isConst=*/true, - /*isTemporary=*/false, - /*isMutable=*/false); + Descriptor *Desc = allocateDescriptor(Base, *CharType, Descriptor::GlobalMD, + StringLength + 1, + /*isConst=*/true, + /*isTemporary=*/false, + /*isMutable=*/false); // Allocate storage for the string. // The byte length does not include the null terminator. @@ -79,26 +68,9 @@ unsigned Program::createGlobalString(const StringLiteral *S, const Expr *Base) { } else { // Construct the string in storage. for (unsigned I = 0; I <= StringLength; ++I) { - const uint32_t CodePoint = I == StringLength ? 0 : S->getCodeUnit(I); - switch (CharType) { - case PT_Sint8: { - using T = PrimConv::T; - Ptr.elem(I) = T::from(CodePoint, BitWidth); - break; - } - case PT_Uint16: { - using T = PrimConv::T; - Ptr.elem(I) = T::from(CodePoint, BitWidth); - break; - } - case PT_Uint32: { - using T = PrimConv::T; - Ptr.elem(I) = T::from(CodePoint, BitWidth); - break; - } - default: - llvm_unreachable("unsupported character type"); - } + uint32_t CodePoint = I == StringLength ? 0 : S->getCodeUnit(I); + INT_TYPE_SWITCH_NO_BOOL(*CharType, + Ptr.elem(I) = T::from(CodePoint, BitWidth);); } } Ptr.initializeAllElements(); diff --git a/clang/test/AST/ByteCode/invalid.cpp b/clang/test/AST/ByteCode/invalid.cpp index 00db27419e36b..1f2d6bc1d48eb 100644 --- a/clang/test/AST/ByteCode/invalid.cpp +++ b/clang/test/AST/ByteCode/invalid.cpp @@ -66,3 +66,26 @@ struct S { S s; S *sp[2] = {&s, &s}; S *&spp = sp[1]; + +namespace InvalidBitCast { + void foo() { + const long long int i = 1; // both-note {{declared const here}} + if (*(double *)&i == 2) { + i = 0; // both-error {{cannot assign to variable}} + } + } + + struct S2 { + void *p; + }; + struct T { + S2 s; + }; + constexpr T t = {{nullptr}}; + constexpr void *foo2() { return ((void **)&t)[0]; } // both-error {{never produces a constant expression}} \ + // both-note 2{{cast that performs the conversions of a reinterpret_cast}} + constexpr auto x = foo2(); // both-error {{must be initialized by a constant expression}} \ + // both-note {{in call to}} + + +} From ad8f6b44be142eb943942f7760e520a36d237d48 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 5 Nov 2025 07:09:02 -0800 Subject: [PATCH 23/39] DAG: Avoid some libcall string name comparisons (#166321) Move to the libcall impl based functions. --- llvm/include/llvm/CodeGen/TargetLowering.h | 5 +-- llvm/include/llvm/IR/RuntimeLibcalls.h | 6 ++-- .../CodeGen/GlobalISel/LegalizerHelper.cpp | 2 +- llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp | 2 +- .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 31 +++++++++++-------- 5 files changed, 24 insertions(+), 22 deletions(-) diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 78f63b4406eb0..b229659415d55 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -3620,10 +3620,7 @@ class LLVM_ABI TargetLoweringBase { return RTLIB::RuntimeLibcallsInfo::getLibcallImplName(Call); } - const char *getMemcpyName() const { - // FIXME: Return StringRef - return Libcalls.getMemcpyName().data(); - } + RTLIB::LibcallImpl getMemcpyImpl() const { return Libcalls.getMemcpyImpl(); } /// Check if this is valid libcall for the current module, otherwise /// RTLIB::Unsupported. diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.h b/llvm/include/llvm/IR/RuntimeLibcalls.h index ab14ed44fed52..bae760b3f981d 100644 --- a/llvm/include/llvm/IR/RuntimeLibcalls.h +++ b/llvm/include/llvm/IR/RuntimeLibcalls.h @@ -134,13 +134,13 @@ struct RuntimeLibcallsInfo { /// Return a function name compatible with RTLIB::MEMCPY, or nullptr if fully /// unsupported. - StringRef getMemcpyName() const { + RTLIB::LibcallImpl getMemcpyImpl() const { RTLIB::LibcallImpl Memcpy = getLibcallImpl(RTLIB::MEMCPY); if (Memcpy != RTLIB::Unsupported) - return getLibcallImplName(Memcpy); + return Memcpy; // Fallback to memmove if memcpy isn't available. - return getLibcallName(RTLIB::MEMMOVE); + return getLibcallImpl(RTLIB::MEMMOVE); } bool isAvailable(RTLIB::LibcallImpl Impl) const { diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 52c43a4ac4a04..d02f097fef829 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -776,7 +776,7 @@ llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, break; case TargetOpcode::G_MEMCPY: RTLibcall = RTLIB::MEMCPY; - Name = TLI.getMemcpyName(); + Name = TLI.getLibcallImplName(TLI.getMemcpyImpl()).data(); Args[0].Flags[0].setReturned(); break; case TargetOpcode::G_MEMMOVE: diff --git a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp index 620d3d3d02daa..d738dc4eea36d 100644 --- a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp +++ b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp @@ -244,7 +244,7 @@ static bool canEmitMemcpy(const TargetMachine *TM, Function *F) { if (!TM) return true; const TargetLowering *TLI = TM->getSubtargetImpl(*F)->getTargetLowering(); - return TLI->getMemcpyName() != nullptr; + return TLI->getMemcpyImpl() != RTLIB::Unsupported; } // Return a value appropriate for use with the memset_pattern16 libcall, if diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 379242ec5a157..ff6a7b2cf5462 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -9257,21 +9257,22 @@ SDValue SelectionDAG::getMemcpy( // FIXME: pass in SDLoc TargetLowering::CallLoweringInfo CLI(*this); bool IsTailCall = false; - const char *MemCpyName = TLI->getMemcpyName(); + RTLIB::LibcallImpl MemCpyImpl = TLI->getMemcpyImpl(); if (OverrideTailCall.has_value()) { IsTailCall = *OverrideTailCall; } else { - bool LowersToMemcpy = StringRef(MemCpyName) == StringRef("memcpy"); + bool LowersToMemcpy = MemCpyImpl == RTLIB::impl_memcpy; IsTailCall = isInTailCallPositionWrapper(CI, this, LowersToMemcpy); } CLI.setDebugLoc(dl) .setChain(Chain) .setLibCallee( - TLI->getLibcallCallingConv(RTLIB::MEMCPY), + TLI->getLibcallImplCallingConv(MemCpyImpl), Dst.getValueType().getTypeForEVT(*getContext()), - getExternalSymbol(MemCpyName, TLI->getPointerTy(getDataLayout())), + getExternalSymbol(TLI->getLibcallImplName(MemCpyImpl).data(), + TLI->getPointerTy(getDataLayout())), std::move(Args)) .setDiscardResult() .setTailCall(IsTailCall); @@ -9361,22 +9362,24 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, // FIXME: pass in SDLoc TargetLowering::CallLoweringInfo CLI(*this); + RTLIB::LibcallImpl MemmoveImpl = TLI->getLibcallImpl(RTLIB::MEMMOVE); + bool IsTailCall = false; if (OverrideTailCall.has_value()) { IsTailCall = *OverrideTailCall; } else { - bool LowersToMemmove = - TLI->getLibcallName(RTLIB::MEMMOVE) == StringRef("memmove"); + bool LowersToMemmove = MemmoveImpl == RTLIB::impl_memmove; IsTailCall = isInTailCallPositionWrapper(CI, this, LowersToMemmove); } CLI.setDebugLoc(dl) .setChain(Chain) - .setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMMOVE), - Dst.getValueType().getTypeForEVT(*getContext()), - getExternalSymbol(TLI->getLibcallName(RTLIB::MEMMOVE), - TLI->getPointerTy(getDataLayout())), - std::move(Args)) + .setLibCallee( + TLI->getLibcallImplCallingConv(MemmoveImpl), + Dst.getValueType().getTypeForEVT(*getContext()), + getExternalSymbol(TLI->getLibcallImplName(MemmoveImpl).data(), + TLI->getPointerTy(getDataLayout())), + std::move(Args)) .setDiscardResult() .setTailCall(IsTailCall); @@ -9492,8 +9495,10 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, TLI->getPointerTy(DL)), std::move(Args)); } - bool LowersToMemset = - TLI->getLibcallName(RTLIB::MEMSET) == StringRef("memset"); + + RTLIB::LibcallImpl MemsetImpl = TLI->getLibcallImpl(RTLIB::MEMSET); + bool LowersToMemset = MemsetImpl == RTLIB::impl_memset; + // If we're going to use bzero, make sure not to tail call unless the // subsequent return doesn't need a value, as bzero doesn't return the first // arg unlike memset. From 3a84aef64a1992c75b5638c9475397b6cf24a186 Mon Sep 17 00:00:00 2001 From: Lei Huang Date: Wed, 5 Nov 2025 10:24:16 -0500 Subject: [PATCH 24/39] [PowerPC][NFC] auto gen checks vec rounding tests (#166435) Update tests to contain auto generated checks. --- llvm/test/CodeGen/PowerPC/vec_rounding.ll | 195 +++++++++++++++------- 1 file changed, 137 insertions(+), 58 deletions(-) diff --git a/llvm/test/CodeGen/PowerPC/vec_rounding.ll b/llvm/test/CodeGen/PowerPC/vec_rounding.ll index 2f16a435440ff..438c8ebdc099e 100644 --- a/llvm/test/CodeGen/PowerPC/vec_rounding.ll +++ b/llvm/test/CodeGen/PowerPC/vec_rounding.ll @@ -1,172 +1,251 @@ -; RUN: llc -verify-machineinstrs -mcpu=pwr6 -mattr=+altivec < %s | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=pwr6 -mattr=+altivec < %s | FileCheck %s ; Check vector round to single-precision toward -infinity (vrfim) ; instruction generation using Altivec. -target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" -target triple = "powerpc64-unknown-linux-gnu" - declare <2 x double> @llvm.floor.v2f64(<2 x double> %p) define <2 x double> @floor_v2f64(<2 x double> %p) +; CHECK-LABEL: floor_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: frim 1, 1 +; CHECK-NEXT: frim 2, 2 +; CHECK-NEXT: blr { %t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p) ret <2 x double> %t } -; CHECK-LABEL: floor_v2f64: -; CHECK: frim -; CHECK: frim declare <4 x double> @llvm.floor.v4f64(<4 x double> %p) define <4 x double> @floor_v4f64(<4 x double> %p) +; CHECK-LABEL: floor_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: frim 1, 1 +; CHECK-NEXT: frim 2, 2 +; CHECK-NEXT: frim 3, 3 +; CHECK-NEXT: frim 4, 4 +; CHECK-NEXT: blr { %t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p) ret <4 x double> %t } -; CHECK-LABEL: floor_v4f64: -; CHECK: frim -; CHECK: frim -; CHECK: frim -; CHECK: frim declare <2 x double> @llvm.ceil.v2f64(<2 x double> %p) define <2 x double> @ceil_v2f64(<2 x double> %p) +; CHECK-LABEL: ceil_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: frip 1, 1 +; CHECK-NEXT: frip 2, 2 +; CHECK-NEXT: blr { %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p) ret <2 x double> %t } -; CHECK-LABEL: ceil_v2f64: -; CHECK: frip -; CHECK: frip declare <4 x double> @llvm.ceil.v4f64(<4 x double> %p) define <4 x double> @ceil_v4f64(<4 x double> %p) +; CHECK-LABEL: ceil_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: frip 1, 1 +; CHECK-NEXT: frip 2, 2 +; CHECK-NEXT: frip 3, 3 +; CHECK-NEXT: frip 4, 4 +; CHECK-NEXT: blr { %t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p) ret <4 x double> %t } -; CHECK-LABEL: ceil_v4f64: -; CHECK: frip -; CHECK: frip -; CHECK: frip -; CHECK: frip declare <2 x double> @llvm.trunc.v2f64(<2 x double> %p) define <2 x double> @trunc_v2f64(<2 x double> %p) +; CHECK-LABEL: trunc_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: friz 1, 1 +; CHECK-NEXT: friz 2, 2 +; CHECK-NEXT: blr { %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p) ret <2 x double> %t } -; CHECK-LABEL: trunc_v2f64: -; CHECK: friz -; CHECK: friz declare <4 x double> @llvm.trunc.v4f64(<4 x double> %p) define <4 x double> @trunc_v4f64(<4 x double> %p) +; CHECK-LABEL: trunc_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: friz 1, 1 +; CHECK-NEXT: friz 2, 2 +; CHECK-NEXT: friz 3, 3 +; CHECK-NEXT: friz 4, 4 +; CHECK-NEXT: blr { %t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p) ret <4 x double> %t } -; CHECK-LABEL: trunc_v4f64: -; CHECK: friz -; CHECK: friz -; CHECK: friz -; CHECK: friz declare <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p) -define <2 x double> @nearbyint_v2f64(<2 x double> %p) +define <2 x double> @nearbyint_v2f64(<2 x double> %p) nounwind +; CHECK-LABEL: nearbyint_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr 0 +; CHECK-NEXT: stdu 1, -128(1) +; CHECK-NEXT: std 0, 144(1) +; CHECK-NEXT: stfd 30, 112(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 31, 120(1) # 8-byte Folded Spill +; CHECK-NEXT: fmr 31, 2 +; CHECK-NEXT: bl nearbyint +; CHECK-NEXT: nop +; CHECK-NEXT: fmr 30, 1 +; CHECK-NEXT: fmr 1, 31 +; CHECK-NEXT: bl nearbyint +; CHECK-NEXT: nop +; CHECK-NEXT: fmr 2, 1 +; CHECK-NEXT: fmr 1, 30 +; CHECK-NEXT: lfd 31, 120(1) # 8-byte Folded Reload +; CHECK-NEXT: lfd 30, 112(1) # 8-byte Folded Reload +; CHECK-NEXT: addi 1, 1, 128 +; CHECK-NEXT: ld 0, 16(1) +; CHECK-NEXT: mtlr 0 +; CHECK-NEXT: blr { %t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p) ret <2 x double> %t } -; CHECK-LABEL: nearbyint_v2f64: -; CHECK: bl nearbyint -; CHECK: bl nearbyint declare <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p) -define <4 x double> @nearbyint_v4f64(<4 x double> %p) +define <4 x double> @nearbyint_v4f64(<4 x double> %p) nounwind +; CHECK-LABEL: nearbyint_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr 0 +; CHECK-NEXT: stdu 1, -144(1) +; CHECK-NEXT: std 0, 160(1) +; CHECK-NEXT: stfd 28, 112(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 29, 120(1) # 8-byte Folded Spill +; CHECK-NEXT: fmr 29, 2 +; CHECK-NEXT: stfd 30, 128(1) # 8-byte Folded Spill +; CHECK-NEXT: fmr 30, 3 +; CHECK-NEXT: stfd 31, 136(1) # 8-byte Folded Spill +; CHECK-NEXT: fmr 31, 4 +; CHECK-NEXT: bl nearbyint +; CHECK-NEXT: nop +; CHECK-NEXT: fmr 28, 1 +; CHECK-NEXT: fmr 1, 29 +; CHECK-NEXT: bl nearbyint +; CHECK-NEXT: nop +; CHECK-NEXT: fmr 29, 1 +; CHECK-NEXT: fmr 1, 30 +; CHECK-NEXT: bl nearbyint +; CHECK-NEXT: nop +; CHECK-NEXT: fmr 30, 1 +; CHECK-NEXT: fmr 1, 31 +; CHECK-NEXT: bl nearbyint +; CHECK-NEXT: nop +; CHECK-NEXT: fmr 4, 1 +; CHECK-NEXT: fmr 1, 28 +; CHECK-NEXT: lfd 31, 136(1) # 8-byte Folded Reload +; CHECK-NEXT: lfd 28, 112(1) # 8-byte Folded Reload +; CHECK-NEXT: fmr 2, 29 +; CHECK-NEXT: fmr 3, 30 +; CHECK-NEXT: lfd 30, 128(1) # 8-byte Folded Reload +; CHECK-NEXT: lfd 29, 120(1) # 8-byte Folded Reload +; CHECK-NEXT: addi 1, 1, 144 +; CHECK-NEXT: ld 0, 16(1) +; CHECK-NEXT: mtlr 0 +; CHECK-NEXT: blr { %t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p) ret <4 x double> %t } -; CHECK-LABEL: nearbyint_v4f64: -; CHECK: bl nearbyint -; CHECK: bl nearbyint -; CHECK: bl nearbyint -; CHECK: bl nearbyint declare <4 x float> @llvm.floor.v4f32(<4 x float> %p) define <4 x float> @floor_v4f32(<4 x float> %p) +; CHECK-LABEL: floor_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vrfim 2, 2 +; CHECK-NEXT: blr { %t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p) ret <4 x float> %t } -; CHECK-LABEL: floor_v4f32: -; CHECK: vrfim declare <8 x float> @llvm.floor.v8f32(<8 x float> %p) define <8 x float> @floor_v8f32(<8 x float> %p) +; CHECK-LABEL: floor_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vrfim 2, 2 +; CHECK-NEXT: vrfim 3, 3 +; CHECK-NEXT: blr { %t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p) ret <8 x float> %t } -; CHECK-LABEL: floor_v8f32: -; CHECK: vrfim -; CHECK: vrfim declare <4 x float> @llvm.ceil.v4f32(<4 x float> %p) define <4 x float> @ceil_v4f32(<4 x float> %p) +; CHECK-LABEL: ceil_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vrfip 2, 2 +; CHECK-NEXT: blr { %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p) ret <4 x float> %t } -; CHECK-LABEL: ceil_v4f32: -; CHECK: vrfip declare <8 x float> @llvm.ceil.v8f32(<8 x float> %p) define <8 x float> @ceil_v8f32(<8 x float> %p) +; CHECK-LABEL: ceil_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vrfip 2, 2 +; CHECK-NEXT: vrfip 3, 3 +; CHECK-NEXT: blr { %t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p) ret <8 x float> %t } -; CHECK-LABEL: ceil_v8f32: -; CHECK: vrfip -; CHECK: vrfip declare <4 x float> @llvm.trunc.v4f32(<4 x float> %p) define <4 x float> @trunc_v4f32(<4 x float> %p) +; CHECK-LABEL: trunc_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vrfiz 2, 2 +; CHECK-NEXT: blr { %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p) ret <4 x float> %t } -; CHECK-LABEL: trunc_v4f32: -; CHECK: vrfiz declare <8 x float> @llvm.trunc.v8f32(<8 x float> %p) define <8 x float> @trunc_v8f32(<8 x float> %p) +; CHECK-LABEL: trunc_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vrfiz 2, 2 +; CHECK-NEXT: vrfiz 3, 3 +; CHECK-NEXT: blr { %t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p) ret <8 x float> %t } -; CHECK-LABEL: trunc_v8f32: -; CHECK: vrfiz -; CHECK: vrfiz declare <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p) define <4 x float> @nearbyint_v4f32(<4 x float> %p) +; CHECK-LABEL: nearbyint_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vrfin 2, 2 +; CHECK-NEXT: blr { %t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p) ret <4 x float> %t } -; CHECK-LABEL: nearbyint_v4f32: -; CHECK: vrfin declare <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p) define <8 x float> @nearbyint_v8f32(<8 x float> %p) +; CHECK-LABEL: nearbyint_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vrfin 2, 2 +; CHECK-NEXT: vrfin 3, 3 +; CHECK-NEXT: blr { %t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p) ret <8 x float> %t } -; CHECK-LABEL: nearbyint_v8f32: -; CHECK: vrfin -; CHECK: vrfin From 9762ab0c3d25b311826f57f0c5b4d601dcede8bc Mon Sep 17 00:00:00 2001 From: Durgadoss R Date: Wed, 5 Nov 2025 20:57:26 +0530 Subject: [PATCH 25/39] [MLIR][NVVM] Fix the lowering of mbarrier.test.wait (#166555) PR #165993 accidentally broke the lowering of the `test.wait` Op. This patch fixes the issue and adds tests to verify the lowering to intrinsics for all mbarrier Ops, ensuring similar regressions are caught in the future. Additionally, the `cp-async-mbarrier` test is moved to the `mbarriers.mlir` test file to keep all related tests together. Signed-off-by: Durgadoss R --- mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td | 2 +- mlir/test/Target/LLVMIR/nvvm/mbarriers.mlir | 116 ++++++++++++++++++++ mlir/test/Target/LLVMIR/nvvmir.mlir | 13 --- 3 files changed, 117 insertions(+), 14 deletions(-) create mode 100644 mlir/test/Target/LLVMIR/nvvm/mbarriers.mlir diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td index 10f0cc254ea97..80bc0e5986e51 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td @@ -949,7 +949,7 @@ def NVVM_MBarrierTestWaitOp : NVVM_Op<"mbarrier.test.wait">, }]; string llvmBuilder = [{ - auto [id, args] = NVVM::MBarrierArriveNocompleteOp::getIntrinsicIDAndArgs( + auto [id, args] = NVVM::MBarrierTestWaitOp::getIntrinsicIDAndArgs( *op, moduleTranslation, builder); $res = createIntrinsicCall(builder, id, args); }]; diff --git a/mlir/test/Target/LLVMIR/nvvm/mbarriers.mlir b/mlir/test/Target/LLVMIR/nvvm/mbarriers.mlir new file mode 100644 index 0000000000000..9bb3b082777fd --- /dev/null +++ b/mlir/test/Target/LLVMIR/nvvm/mbarriers.mlir @@ -0,0 +1,116 @@ +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s + +llvm.func @cp_async_mbarrier_arrive(%bar_shared: !llvm.ptr<3>, %bar_gen: !llvm.ptr) { + // CHECK-LABEL: define void @cp_async_mbarrier_arrive(ptr addrspace(3) %0, ptr %1) { + // CHECK-NEXT: call void @llvm.nvvm.cp.async.mbarrier.arrive(ptr %1) + // CHECK-NEXT: call void @llvm.nvvm.cp.async.mbarrier.arrive.noinc(ptr %1) + // CHECK-NEXT: call void @llvm.nvvm.cp.async.mbarrier.arrive.shared(ptr addrspace(3) %0) + // CHECK-NEXT: call void @llvm.nvvm.cp.async.mbarrier.arrive.noinc.shared(ptr addrspace(3) %0) + // CHECK-NEXT: ret void + // CHECK-NEXT: } + nvvm.cp.async.mbarrier.arrive %bar_gen : !llvm.ptr + nvvm.cp.async.mbarrier.arrive %bar_gen {noinc = true} : !llvm.ptr + nvvm.cp.async.mbarrier.arrive %bar_shared : !llvm.ptr<3> + nvvm.cp.async.mbarrier.arrive %bar_shared {noinc = true} : !llvm.ptr<3> + llvm.return +} + +llvm.func @mbarrier_init_generic(%barrier: !llvm.ptr) { + // CHECK-LABEL: define void @mbarrier_init_generic(ptr %0) { + // CHECK-NEXT: %2 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() + // CHECK-NEXT: call void @llvm.nvvm.mbarrier.init(ptr %0, i32 %2) + // CHECK-NEXT: ret void + // CHECK-NEXT: } + %count = nvvm.read.ptx.sreg.ntid.x : i32 + nvvm.mbarrier.init %barrier, %count : !llvm.ptr, i32 + llvm.return +} + +llvm.func @mbarrier_init_shared(%barrier: !llvm.ptr<3>) { + // CHECK-LABEL: define void @mbarrier_init_shared(ptr addrspace(3) %0) { + // CHECK-NEXT: %2 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() + // CHECK-NEXT: call void @llvm.nvvm.mbarrier.init.shared(ptr addrspace(3) %0, i32 %2) + // CHECK-NEXT: ret void + // CHECK-NEXT: } + %count = nvvm.read.ptx.sreg.ntid.x : i32 + nvvm.mbarrier.init %barrier, %count : !llvm.ptr<3>, i32 + llvm.return +} + +llvm.func @mbarrier_inval_generic(%barrier: !llvm.ptr) { + // CHECK-LABEL: define void @mbarrier_inval_generic(ptr %0) { + // CHECK-NEXT: call void @llvm.nvvm.mbarrier.inval(ptr %0) + // CHECK-NEXT: ret void + // CHECK-NEXT: } + nvvm.mbarrier.inval %barrier : !llvm.ptr + llvm.return +} + +llvm.func @mbarrier_inval_shared(%barrier: !llvm.ptr<3>) { + // CHECK-LABEL: define void @mbarrier_inval_shared(ptr addrspace(3) %0) { + // CHECK-NEXT: call void @llvm.nvvm.mbarrier.inval.shared(ptr addrspace(3) %0) + // CHECK-NEXT: ret void + // CHECK-NEXT: } + nvvm.mbarrier.inval %barrier : !llvm.ptr<3> + llvm.return +} + +llvm.func @mbarrier_arrive(%barrier: !llvm.ptr) { + // CHECK-LABEL: define void @mbarrier_arrive(ptr %0) { + // CHECK-NEXT: %2 = call i64 @llvm.nvvm.mbarrier.arrive(ptr %0) + // CHECK-NEXT: ret void + // CHECK-NEXT: } + %0 = nvvm.mbarrier.arrive %barrier : !llvm.ptr -> i64 + llvm.return +} + +llvm.func @mbarrier_arrive_shared(%barrier: !llvm.ptr<3>) { + // CHECK-LABEL: define void @mbarrier_arrive_shared(ptr addrspace(3) %0) { + // CHECK-NEXT: %2 = call i64 @llvm.nvvm.mbarrier.arrive.shared(ptr addrspace(3) %0) + // CHECK-NEXT: ret void + // CHECK-NEXT: } + %0 = nvvm.mbarrier.arrive %barrier : !llvm.ptr<3> -> i64 + llvm.return +} + +llvm.func @mbarrier_arrive_nocomplete(%barrier: !llvm.ptr) { + // CHECK-LABEL: define void @mbarrier_arrive_nocomplete(ptr %0) { + // CHECK-NEXT: %2 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() + // CHECK-NEXT: %3 = call i64 @llvm.nvvm.mbarrier.arrive.noComplete(ptr %0, i32 %2) + // CHECK-NEXT: ret void + // CHECK-NEXT: } + %count = nvvm.read.ptx.sreg.ntid.x : i32 + %0 = nvvm.mbarrier.arrive.nocomplete %barrier, %count : !llvm.ptr, i32 -> i64 + llvm.return +} + +llvm.func @mbarrier_arrive_nocomplete_shared(%barrier: !llvm.ptr<3>) { + // CHECK-LABEL: define void @mbarrier_arrive_nocomplete_shared(ptr addrspace(3) %0) { + // CHECK-NEXT: %2 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() + // CHECK-NEXT: %3 = call i64 @llvm.nvvm.mbarrier.arrive.noComplete.shared(ptr addrspace(3) %0, i32 %2) + // CHECK-NEXT: ret void + // CHECK-NEXT: } + %count = nvvm.read.ptx.sreg.ntid.x : i32 + %0 = nvvm.mbarrier.arrive.nocomplete %barrier, %count : !llvm.ptr<3>, i32 -> i64 + llvm.return +} + +llvm.func @mbarrier_test_wait(%barrier: !llvm.ptr, %token : i64) -> i1 { + // CHECK-LABEL: define i1 @mbarrier_test_wait(ptr %0, i64 %1) { + // CHECK-NEXT: %3 = call i1 @llvm.nvvm.mbarrier.test.wait(ptr %0, i64 %1) + // CHECK-NEXT: ret i1 %3 + // CHECK-NEXT: } + %isComplete = nvvm.mbarrier.test.wait %barrier, %token : !llvm.ptr, i64 -> i1 + llvm.return %isComplete : i1 +} + +llvm.func @mbarrier_test_wait_shared(%barrier: !llvm.ptr<3>, %token : i64) { + // CHECK-LABEL: define void @mbarrier_test_wait_shared(ptr addrspace(3) %0, i64 %1) { + // CHECK-NEXT: %3 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() + // CHECK-NEXT: %4 = call i1 @llvm.nvvm.mbarrier.test.wait.shared(ptr addrspace(3) %0, i64 %1) + // CHECK-NEXT: ret void + // CHECK-NEXT: } + %count = nvvm.read.ptx.sreg.ntid.x : i32 + %isComplete = nvvm.mbarrier.test.wait %barrier, %token : !llvm.ptr<3>, i64 -> i1 + llvm.return +} diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir index 3fc09f371a347..1ec55408e97a5 100644 --- a/mlir/test/Target/LLVMIR/nvvmir.mlir +++ b/mlir/test/Target/LLVMIR/nvvmir.mlir @@ -531,19 +531,6 @@ llvm.func @async_cp_zfill(%dst: !llvm.ptr<3>, %src: !llvm.ptr<1>, %cpSize: i32) llvm.return } -// CHECK-LABEL: @cp_async_mbarrier_arrive -llvm.func @cp_async_mbarrier_arrive(%bar_shared: !llvm.ptr<3>, %bar_gen: !llvm.ptr) { - // CHECK: call void @llvm.nvvm.cp.async.mbarrier.arrive(ptr %{{.*}}) - nvvm.cp.async.mbarrier.arrive %bar_gen : !llvm.ptr - // CHECK: call void @llvm.nvvm.cp.async.mbarrier.arrive.noinc(ptr %{{.*}}) - nvvm.cp.async.mbarrier.arrive %bar_gen {noinc = true} : !llvm.ptr - // CHECK: call void @llvm.nvvm.cp.async.mbarrier.arrive.shared(ptr addrspace(3) %{{.*}}) - nvvm.cp.async.mbarrier.arrive %bar_shared : !llvm.ptr<3> - // CHECK: call void @llvm.nvvm.cp.async.mbarrier.arrive.noinc.shared(ptr addrspace(3) %{{.*}}) - nvvm.cp.async.mbarrier.arrive %bar_shared {noinc = true} : !llvm.ptr<3> - llvm.return -} - // CHECK-LABEL: @llvm_nvvm_setmaxregister llvm.func @llvm_nvvm_setmaxregister() { // CHECK: call void @llvm.nvvm.setmaxnreg.inc.sync.aligned.u32(i32 256) From 338fb02c9878761a97ed0e7e44c19d6ed8463434 Mon Sep 17 00:00:00 2001 From: Elvina Yakubova Date: Wed, 5 Nov 2025 15:28:31 +0000 Subject: [PATCH 26/39] =?UTF-8?q?[BOLT][NFC]=20Rename=20funtions=20with=20?= =?UTF-8?q?=5Fnegative=20suffix=20to=20=5Funknown=20when=20th=E2=80=A6=20(?= =?UTF-8?q?#166536)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …e size is unknown Keep _negative suffix only for test cases when the size is negative --- bolt/test/runtime/AArch64/inline-memcpy.s | 30 +++++++++++------------ 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/bolt/test/runtime/AArch64/inline-memcpy.s b/bolt/test/runtime/AArch64/inline-memcpy.s index badff299603a0..75066c855b9ed 100644 --- a/bolt/test/runtime/AArch64/inline-memcpy.s +++ b/bolt/test/runtime/AArch64/inline-memcpy.s @@ -81,14 +81,14 @@ # CHECK-ASM: bl{{.*}}: +# CHECK-ASM-LABEL: : # CHECK-ASM: bl{{.*}}: +# CHECK-ASM-LABEL: : # CHECK-ASM: bl{{.*}}: +# CHECK-ASM-LABEL: : # CHECK-ASM: bl{{.*}} Date: Wed, 5 Nov 2025 16:35:25 +0100 Subject: [PATCH 27/39] [flang] Adding NOTIFY specifier in image selector and add notify type checks (#148810) This PR adds support for the NOTIFY specifier in the image selector as described in the 2023 standard, and add checks for the NOTIFY_TYPE type. --- flang/examples/FeatureList/FeatureList.cpp | 1 + flang/include/flang/Evaluate/traverse.h | 2 +- flang/include/flang/Evaluate/variable.h | 4 +++ flang/include/flang/Parser/dump-parse-tree.h | 1 + flang/include/flang/Parser/parse-tree.h | 4 ++- flang/include/flang/Semantics/tools.h | 3 ++ flang/lib/Evaluate/variable.cpp | 13 +++++++ flang/lib/Lower/Support/Utils.cpp | 5 +-- flang/lib/Parser/Fortran-parsers.cpp | 7 ++-- flang/lib/Parser/unparse.cpp | 1 + flang/lib/Semantics/check-declarations.cpp | 13 +++++++ flang/lib/Semantics/dump-expr.cpp | 1 + flang/lib/Semantics/expression.cpp | 13 +++++++ flang/lib/Semantics/tools.cpp | 38 ++++++++++++++++++++ flang/test/Semantics/coarrays02.f90 | 17 +++++++++ flang/test/Semantics/notifywait03.f90 | 1 + 16 files changed, 118 insertions(+), 6 deletions(-) diff --git a/flang/examples/FeatureList/FeatureList.cpp b/flang/examples/FeatureList/FeatureList.cpp index ef58da61e371b..bb55a8163d938 100644 --- a/flang/examples/FeatureList/FeatureList.cpp +++ b/flang/examples/FeatureList/FeatureList.cpp @@ -348,6 +348,7 @@ struct NodeVisitor { READ_FEATURE(TeamValue) READ_FEATURE(ImageSelector) READ_FEATURE(ImageSelectorSpec) + READ_FEATURE(ImageSelectorSpec::Notify) READ_FEATURE(ImageSelectorSpec::Stat) READ_FEATURE(ImageSelectorSpec::Team_Number) READ_FEATURE(ImplicitPart) diff --git a/flang/include/flang/Evaluate/traverse.h b/flang/include/flang/Evaluate/traverse.h index 48aafa8982559..d63c16f93230a 100644 --- a/flang/include/flang/Evaluate/traverse.h +++ b/flang/include/flang/Evaluate/traverse.h @@ -146,7 +146,7 @@ class Traverse { return Combine(x.base(), x.subscript()); } Result operator()(const CoarrayRef &x) const { - return Combine(x.base(), x.cosubscript(), x.stat(), x.team()); + return Combine(x.base(), x.cosubscript(), x.notify(), x.stat(), x.team()); } Result operator()(const DataRef &x) const { return visitor_(x.u); } Result operator()(const Substring &x) const { diff --git a/flang/include/flang/Evaluate/variable.h b/flang/include/flang/Evaluate/variable.h index 5c14421fd3a1b..4f64ede3d407d 100644 --- a/flang/include/flang/Evaluate/variable.h +++ b/flang/include/flang/Evaluate/variable.h @@ -260,6 +260,9 @@ class CoarrayRef { // it's TEAM=. std::optional> team() const; CoarrayRef &set_team(Expr &&); + // When notify() is Expr, it's NOTIFY=. + std::optional> notify() const; + CoarrayRef &set_notify(Expr &&); int Rank() const; int Corank() const { return 0; } @@ -272,6 +275,7 @@ class CoarrayRef { private: common::CopyableIndirection base_; std::vector> cosubscript_; + std::optional>> notify_; std::optional>> stat_; std::optional>> team_; }; diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h index de2716410d6cd..b2424023b0168 100644 --- a/flang/include/flang/Parser/dump-parse-tree.h +++ b/flang/include/flang/Parser/dump-parse-tree.h @@ -387,6 +387,7 @@ class ParseTreeDumper { NODE(parser, TeamValue) NODE(parser, ImageSelector) NODE(parser, ImageSelectorSpec) + NODE(ImageSelectorSpec, Notify) NODE(ImageSelectorSpec, Stat) NODE(ImageSelectorSpec, Team_Number) NODE(parser, ImplicitPart) diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index 8c7578f7a1941..32e444fbb2e6c 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -1684,13 +1684,15 @@ using Cosubscript = ScalarIntExpr; WRAPPER_CLASS(TeamValue, Scalar>); // R926 image-selector-spec -> +// NOTIFY = notify-variable | // STAT = stat-variable | TEAM = team-value | // TEAM_NUMBER = scalar-int-expr struct ImageSelectorSpec { WRAPPER_CLASS(Stat, Scalar>>); WRAPPER_CLASS(Team_Number, ScalarIntExpr); + WRAPPER_CLASS(Notify, Scalar>); UNION_CLASS_BOILERPLATE(ImageSelectorSpec); - std::variant u; + std::variant u; }; // R924 image-selector -> diff --git a/flang/include/flang/Semantics/tools.h b/flang/include/flang/Semantics/tools.h index 8a7b9867c0979..1c3477013b559 100644 --- a/flang/include/flang/Semantics/tools.h +++ b/flang/include/flang/Semantics/tools.h @@ -107,6 +107,7 @@ bool IsBindCProcedure(const Scope &); // Returns a pointer to the function's symbol when true, else null const Symbol *IsFunctionResultWithSameNameAsFunction(const Symbol &); bool IsOrContainsEventOrLockComponent(const Symbol &); +bool IsOrContainsNotifyComponent(const Symbol &); bool CanBeTypeBoundProc(const Symbol &); // Does a non-PARAMETER symbol have explicit initialization with =value or // =>target in its declaration (but not in a DATA statement)? (Being @@ -652,6 +653,8 @@ using PotentialAndPointerComponentIterator = // dereferenced. PotentialComponentIterator::const_iterator FindEventOrLockPotentialComponent( const DerivedTypeSpec &, bool ignoreCoarrays = false); +PotentialComponentIterator::const_iterator FindNotifyPotentialComponent( + const DerivedTypeSpec &, bool ignoreCoarrays = false); PotentialComponentIterator::const_iterator FindCoarrayPotentialComponent( const DerivedTypeSpec &); PotentialAndPointerComponentIterator::const_iterator diff --git a/flang/lib/Evaluate/variable.cpp b/flang/lib/Evaluate/variable.cpp index b9b34d4d5bc89..b257dad42fc58 100644 --- a/flang/lib/Evaluate/variable.cpp +++ b/flang/lib/Evaluate/variable.cpp @@ -89,6 +89,14 @@ std::optional> CoarrayRef::team() const { } } +std::optional> CoarrayRef::notify() const { + if (notify_) { + return notify_.value().value(); + } else { + return std::nullopt; + } +} + CoarrayRef &CoarrayRef::set_stat(Expr &&v) { CHECK(IsVariable(v)); stat_.emplace(std::move(v)); @@ -100,6 +108,11 @@ CoarrayRef &CoarrayRef::set_team(Expr &&v) { return *this; } +CoarrayRef &CoarrayRef::set_notify(Expr &&v) { + notify_.emplace(std::move(v)); + return *this; +} + const Symbol &CoarrayRef::GetFirstSymbol() const { return base().GetFirstSymbol(); } diff --git a/flang/lib/Lower/Support/Utils.cpp b/flang/lib/Lower/Support/Utils.cpp index 1b4d37e9798a9..4b95a3adf052a 100644 --- a/flang/lib/Lower/Support/Utils.cpp +++ b/flang/lib/Lower/Support/Utils.cpp @@ -82,7 +82,7 @@ class HashEvaluateExpr { x.cosubscript()) cosubs -= getHashValue(v); return getHashValue(x.base()) * 97u - cosubs + getHashValue(x.stat()) + - 257u + getHashValue(x.team()); + 257u + getHashValue(x.team()) + getHashValue(x.notify()); } static unsigned getHashValue(const Fortran::evaluate::NamedEntity &x) { if (x.IsSymbol()) @@ -341,7 +341,8 @@ class IsEqualEvaluateExpr { const Fortran::evaluate::CoarrayRef &y) { return isEqual(x.base(), y.base()) && isEqual(x.cosubscript(), y.cosubscript()) && - isEqual(x.stat(), y.stat()) && isEqual(x.team(), y.team()); + isEqual(x.stat(), y.stat()) && isEqual(x.team(), y.team()) && + isEqual(x.notify(), y.notify()); } static bool isEqual(const Fortran::evaluate::NamedEntity &x, const Fortran::evaluate::NamedEntity &y) { diff --git a/flang/lib/Parser/Fortran-parsers.cpp b/flang/lib/Parser/Fortran-parsers.cpp index 59fe7d813d96a..ea6a1eada2741 100644 --- a/flang/lib/Parser/Fortran-parsers.cpp +++ b/flang/lib/Parser/Fortran-parsers.cpp @@ -1212,12 +1212,15 @@ TYPE_CONTEXT_PARSER("image selector"_en_US, // R926 image-selector-spec -> // STAT = stat-variable | TEAM = team-value | -// TEAM_NUMBER = scalar-int-expr +// TEAM_NUMBER = scalar-int-expr | +// NOTIFY = notify-variable TYPE_PARSER(construct(construct( "STAT =" >> scalar(integer(indirect(variable))))) || construct(construct("TEAM =" >> teamValue)) || construct(construct( - "TEAM_NUMBER =" >> scalarIntExpr))) + "TEAM_NUMBER =" >> scalarIntExpr)) || + construct(construct( + "NOTIFY =" >> scalar(indirect(variable))))) // R927 allocate-stmt -> // ALLOCATE ( [type-spec ::] allocation-list [, alloc-opt-list] ) diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp index 84123030195e9..6bb14a43e7b99 100644 --- a/flang/lib/Parser/unparse.cpp +++ b/flang/lib/Parser/unparse.cpp @@ -819,6 +819,7 @@ class UnparseVisitor { Word("TEAM="); } } + void Before(const ImageSelectorSpec::Notify &) { Word("NOTIFY="); } void Unparse(const AllocateStmt &x) { // R927 Word("ALLOCATE("); Walk(std::get>(x.t), "::"); diff --git a/flang/lib/Semantics/check-declarations.cpp b/flang/lib/Semantics/check-declarations.cpp index de407d3b1e125..9a6b3ff3cdc2c 100644 --- a/flang/lib/Semantics/check-declarations.cpp +++ b/flang/lib/Semantics/check-declarations.cpp @@ -855,6 +855,15 @@ void CheckHelper::CheckObjectEntity( messages_.Say( "Variable '%s' with EVENT_TYPE or LOCK_TYPE potential component '%s' must be a coarray"_err_en_US, symbol.name(), component.BuildResultDesignatorName()); + } else if (IsNotifyType(derived)) { // C1612 + messages_.Say( + "Variable '%s' with NOTIFY_TYPE must be a coarray"_err_en_US, + symbol.name()); + } else if (auto component{FindNotifyPotentialComponent( // C1611 + *derived, /*ignoreCoarrays=*/true)}) { + messages_.Say( + "Variable '%s' with NOTIFY_TYPE potential component '%s' must be a coarray"_err_en_US, + symbol.name(), component.BuildResultDesignatorName()); } } } @@ -873,6 +882,10 @@ void CheckHelper::CheckObjectEntity( messages_.Say( "An INTENT(OUT) dummy argument may not be, or contain, EVENT_TYPE or LOCK_TYPE"_err_en_US); } + if (IsOrContainsNotifyComponent(symbol)) { // C1613 + messages_.Say( + "An INTENT(OUT) dummy argument may not be, or contain, NOTIFY_TYPE"_err_en_US); + } if (IsAssumedSizeArray(symbol)) { // C834 if (type && type->IsPolymorphic()) { messages_.Say( diff --git a/flang/lib/Semantics/dump-expr.cpp b/flang/lib/Semantics/dump-expr.cpp index 66cedab94bfb4..8d354cf65b61e 100644 --- a/flang/lib/Semantics/dump-expr.cpp +++ b/flang/lib/Semantics/dump-expr.cpp @@ -23,6 +23,7 @@ void DumpEvaluateExpr::Show(const evaluate::CoarrayRef &x) { Indent("coarray ref"); Show(x.base()); Show(x.cosubscript()); + Show(x.notify()); Show(x.stat()); Show(x.team()); Outdent(); diff --git a/flang/lib/Semantics/expression.cpp b/flang/lib/Semantics/expression.cpp index c8167fd34f666..ac58dfc005f17 100644 --- a/flang/lib/Semantics/expression.cpp +++ b/flang/lib/Semantics/expression.cpp @@ -1579,6 +1579,19 @@ MaybeExpr ExpressionAnalyzer::Analyze(const parser::CoindexedNamedObject &x) { std::get>(x.imageSelector.t)) { common::visit( common::visitors{ + [&](const parser::ImageSelectorSpec::Notify &x) { + Analyze(x.v); + if (const auto *expr{GetExpr(context_, x.v)}) { + if (coarrayRef.notify()) { + Say("coindexed reference has multiple NOTIFY= specifiers"_err_en_US); + } else if (auto dyType{expr->GetType()}; + dyType && IsNotifyType(GetDerivedTypeSpec(*dyType))) { + coarrayRef.set_notify(Expr{*expr}); + } else { + Say("NOTIFY= specifier must have type NOTIFY_TYPE from ISO_FORTRAN_ENV"_err_en_US); + } + } + }, [&](const parser::ImageSelectorSpec::Stat &x) { Analyze(x.v); if (const auto *expr{GetExpr(context_, x.v)}) { diff --git a/flang/lib/Semantics/tools.cpp b/flang/lib/Semantics/tools.cpp index 8eddd03faa962..cf1e5e7d44565 100644 --- a/flang/lib/Semantics/tools.cpp +++ b/flang/lib/Semantics/tools.cpp @@ -582,6 +582,18 @@ bool IsOrContainsEventOrLockComponent(const Symbol &original) { return false; } +bool IsOrContainsNotifyComponent(const Symbol &original) { + const Symbol &symbol{ResolveAssociations(original, /*stopAtTypeGuard=*/true)}; + if (evaluate::IsVariable(symbol)) { + if (const DeclTypeSpec *type{symbol.GetType()}) { + if (const DerivedTypeSpec *derived{type->AsDerived()}) { + return IsNotifyType(derived) || FindNotifyPotentialComponent(*derived); + } + } + } + return false; +} + // Check this symbol suitable as a type-bound procedure - C769 bool CanBeTypeBoundProc(const Symbol &symbol) { if (IsDummy(symbol) || IsProcedurePointer(symbol)) { @@ -1489,6 +1501,32 @@ PotentialComponentIterator::const_iterator FindEventOrLockPotentialComponent( return iter; } +PotentialComponentIterator::const_iterator FindNotifyPotentialComponent( + const DerivedTypeSpec &derived, bool ignoreCoarrays) { + PotentialComponentIterator potentials{derived}; + auto iter{potentials.begin()}; + for (auto end{potentials.end()}; iter != end; ++iter) { + const Symbol &component{*iter}; + if (const auto *object{component.detailsIf()}) { + if (const DeclTypeSpec *type{object->type()}) { + if (IsNotifyType(type->AsDerived())) { + if (!ignoreCoarrays) { + break; // found one + } + auto path{iter.GetComponentPath()}; + path.pop_back(); + if (std::find_if(path.begin(), path.end(), [](const Symbol &sym) { + return evaluate::IsCoarray(sym); + }) == path.end()) { + break; // found one not in a coarray + } + } + } + } + } + return iter; +} + UltimateComponentIterator::const_iterator FindAllocatableUltimateComponent( const DerivedTypeSpec &derived) { UltimateComponentIterator ultimates{derived}; diff --git a/flang/test/Semantics/coarrays02.f90 b/flang/test/Semantics/coarrays02.f90 index b16e0ccb58797..e866dd89c07ab 100644 --- a/flang/test/Semantics/coarrays02.f90 +++ b/flang/test/Semantics/coarrays02.f90 @@ -16,6 +16,8 @@ program main type(event_type) event !ERROR: Variable 'lock' with EVENT_TYPE or LOCK_TYPE must be a coarray type(lock_type) lock + !ERROR: Variable 'notify' with NOTIFY_TYPE must be a coarray + type(notify_type) notify integer :: local[*] ! ok in main end @@ -120,3 +122,18 @@ subroutine s4 !ERROR: Subscripts must appear in a coindexed reference when its base is an array print *, ta(1)%a[1] end + +subroutine s5(a, notify, res) + use iso_fortran_env + type t + type(notify_type) :: a + end type + real, intent(in) :: a[*] + type(event_type), intent(in) :: notify[*] + !ERROR: An INTENT(OUT) dummy argument may not be, or contain, NOTIFY_TYPE + type(notify_type), intent(out) :: res[*] + !ERROR: Variable 'bad' with NOTIFY_TYPE potential component '%a' must be a coarray + type(t) :: bad + !ERROR: NOTIFY= specifier must have type NOTIFY_TYPE from ISO_FORTRAN_ENV + print *, a[1, NOTIFY=notify] +end diff --git a/flang/test/Semantics/notifywait03.f90 b/flang/test/Semantics/notifywait03.f90 index 0fc56f66ad32d..a336a7a67669a 100644 --- a/flang/test/Semantics/notifywait03.f90 +++ b/flang/test/Semantics/notifywait03.f90 @@ -10,6 +10,7 @@ program test_notify_wait implicit none ! notify_type variables must be coarrays + !ERROR: Variable 'non_coarray' with NOTIFY_TYPE must be a coarray type(notify_type) :: non_coarray type(notify_type) :: notify_var[*], notify_array(2)[*] From 52cb6e9d49f836b624bd0536734afd7aa4194ca0 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Wed, 5 Nov 2025 07:40:37 -0800 Subject: [PATCH 28/39] [ProfCheck][NFC] Make Function argument from branch weight setter optional (#166032) This picks up from #166028, making the `Function` argument optional: most cases don't need to provide it, but in e.g. InstCombine's case, where the instruction (select, branch) is not attached to a function yet, the function needs to be passed explicitly. Co-authored-by: Florian Hahn --- llvm/include/llvm/IR/ProfDataUtils.h | 9 +++++---- llvm/lib/IR/IRBuilder.cpp | 3 +-- llvm/lib/IR/ProfDataUtils.cpp | 9 ++++++--- .../AggressiveInstCombine/AggressiveInstCombine.cpp | 3 +-- llvm/lib/Transforms/InstCombine/InstCombineInternal.h | 2 +- llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp | 3 +-- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 3 +-- 7 files changed, 16 insertions(+), 16 deletions(-) diff --git a/llvm/include/llvm/IR/ProfDataUtils.h b/llvm/include/llvm/IR/ProfDataUtils.h index a0876b169e0b8..a7bcbf010d1bf 100644 --- a/llvm/include/llvm/IR/ProfDataUtils.h +++ b/llvm/include/llvm/IR/ProfDataUtils.h @@ -194,10 +194,11 @@ LLVM_ABI void setExplicitlyUnknownBranchWeights(Instruction &I, /// Like setExplicitlyUnknownBranchWeights(...), but only sets unknown branch /// weights in the new instruction if the parent function of the original /// instruction has an entry count. This is to not confuse users by injecting -/// profile data into non-profiled functions. -LLVM_ABI void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, - Function &F, - StringRef PassName); +/// profile data into non-profiled functions. If \p F is nullptr, we will fetch +/// the function from \p I. +LLVM_ABI void +setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, StringRef PassName, + const Function *F = nullptr); /// Analogous to setExplicitlyUnknownBranchWeights, but for functions and their /// entry counts. diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp index 88dbd176e0d3f..95edb2e8e56d8 100644 --- a/llvm/lib/IR/IRBuilder.cpp +++ b/llvm/lib/IR/IRBuilder.cpp @@ -1019,8 +1019,7 @@ Value *IRBuilderBase::CreateSelectWithUnknownProfile(Value *C, Value *True, const Twine &Name) { Value *Ret = CreateSelectFMF(C, True, False, {}, Name); if (auto *SI = dyn_cast(Ret)) { - setExplicitlyUnknownBranchWeightsIfProfiled( - *SI, *SI->getParent()->getParent(), PassName); + setExplicitlyUnknownBranchWeightsIfProfiled(*SI, PassName); } return Ret; } diff --git a/llvm/lib/IR/ProfDataUtils.cpp b/llvm/lib/IR/ProfDataUtils.cpp index fc2be5188f456..94dbe1f3988b8 100644 --- a/llvm/lib/IR/ProfDataUtils.cpp +++ b/llvm/lib/IR/ProfDataUtils.cpp @@ -274,9 +274,12 @@ void llvm::setExplicitlyUnknownBranchWeights(Instruction &I, } void llvm::setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, - Function &F, - StringRef PassName) { - if (std::optional EC = F.getEntryCount(); + StringRef PassName, + const Function *F) { + F = F ? F : I.getFunction(); + assert(F && "Either pass a instruction attached to a Function, or explicitly " + "pass the Function that it will be attached to"); + if (std::optional EC = F->getEntryCount(); EC && EC->getCount() > 0) setExplicitlyUnknownBranchWeights(I, PassName); } diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp index 7a95df4b2a47c..b575d76e897d2 100644 --- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -1378,8 +1378,7 @@ static bool foldMemChr(CallInst *Call, DomTreeUpdater *DTU, IRB.CreateTrunc(Call->getArgOperand(1), ByteTy), BBNext, N); // We can't know the precise weights here, as they would depend on the value // distribution of Call->getArgOperand(1). So we just mark it as "unknown". - setExplicitlyUnknownBranchWeightsIfProfiled(*SI, *Call->getFunction(), - DEBUG_TYPE); + setExplicitlyUnknownBranchWeightsIfProfiled(*SI, DEBUG_TYPE); Type *IndexTy = DL.getIndexType(Call->getType()); SmallVector Updates; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index d85e4f7590197..9bdd8cb71f7f3 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -479,7 +479,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final const Twine &NameStr = "", InsertPosition InsertBefore = nullptr) { auto *Sel = SelectInst::Create(C, S1, S2, NameStr, InsertBefore, nullptr); - setExplicitlyUnknownBranchWeightsIfProfiled(*Sel, F, DEBUG_TYPE); + setExplicitlyUnknownBranchWeightsIfProfiled(*Sel, DEBUG_TYPE, &F); return Sel; } diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp index 0577ddbd2353c..7930b38174e49 100644 --- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -330,8 +330,7 @@ static void buildPartialUnswitchConditionalBranch( HasBranchWeights ? ComputeProfFrom.getMetadata(LLVMContext::MD_prof) : nullptr); if (!HasBranchWeights) - setExplicitlyUnknownBranchWeightsIfProfiled( - *BR, *BR->getParent()->getParent(), DEBUG_TYPE); + setExplicitlyUnknownBranchWeightsIfProfiled(*BR, DEBUG_TYPE); } /// Copy a set of loop invariant values, and conditionally branch on them. diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 3a3e3ade20212..9a8dbebe5bfba 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -5214,8 +5214,7 @@ bool SimplifyCFGOpt::simplifyBranchOnICmpChain(BranchInst *BI, // We don't have any info about this condition. auto *Br = TrueWhenEqual ? Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB) : Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB); - setExplicitlyUnknownBranchWeightsIfProfiled(*Br, *NewBB->getParent(), - DEBUG_TYPE); + setExplicitlyUnknownBranchWeightsIfProfiled(*Br, DEBUG_TYPE); OldTI->eraseFromParent(); From 4334b43c6593f3839d33806131fd36c620390cbe Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Wed, 5 Nov 2025 10:43:56 -0500 Subject: [PATCH 29/39] [gn] port bb4ed55acdbc --- .../source/Plugins/LanguageRuntime/CPlusPlus/BUILD.gn | 10 ++++++++-- llvm/utils/gn/secondary/lldb/source/Target/BUILD.gn | 1 - 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/llvm/utils/gn/secondary/lldb/source/Plugins/LanguageRuntime/CPlusPlus/BUILD.gn b/llvm/utils/gn/secondary/lldb/source/Plugins/LanguageRuntime/CPlusPlus/BUILD.gn index 9848efef70568..fa99fa8649caf 100644 --- a/llvm/utils/gn/secondary/lldb/source/Plugins/LanguageRuntime/CPlusPlus/BUILD.gn +++ b/llvm/utils/gn/secondary/lldb/source/Plugins/LanguageRuntime/CPlusPlus/BUILD.gn @@ -1,10 +1,16 @@ static_library("CPlusPlus") { output_name = "lldbPluginCPPRuntime" - configs += [ "//llvm/utils/gn/build:lldb_code" ] + configs += [ + "//llvm/utils/gn/build:clang_code", + "//llvm/utils/gn/build:lldb_code", + ] deps = [ "//lldb/source/Core", "//lldb/source/Symbol", "//lldb/source/Target", ] - sources = [ "CPPLanguageRuntime.cpp" ] + sources = [ + "CPPLanguageRuntime.cpp", + "VerboseTrapFrameRecognizer.cpp", + ] } diff --git a/llvm/utils/gn/secondary/lldb/source/Target/BUILD.gn b/llvm/utils/gn/secondary/lldb/source/Target/BUILD.gn index a863baf912051..783eb96283596 100644 --- a/llvm/utils/gn/secondary/lldb/source/Target/BUILD.gn +++ b/llvm/utils/gn/secondary/lldb/source/Target/BUILD.gn @@ -105,6 +105,5 @@ static_library("Target") { "UnixSignals.cpp", "UnwindAssembly.cpp", "UnwindLLDB.cpp", - "VerboseTrapFrameRecognizer.cpp", ] } From fa6cc7eadedd61347456f088ef40f6dfdcd751e2 Mon Sep 17 00:00:00 2001 From: Doug Wyatt Date: Wed, 5 Nov 2025 07:45:33 -0800 Subject: [PATCH 30/39] [Clang] FunctionEffects: ignore (methods of) local CXXRecordDecls. (#166078) In the following example, `Functor::method()` inappropriately triggers a diagnostic that `outer()` is blocking by allocating memory. ``` void outer() [[clang::nonblocking]] { struct Functor { int* ptr; void method() { ptr = new int; } }; } ``` --------- Co-authored-by: Doug Wyatt --- clang/lib/Sema/SemaFunctionEffects.cpp | 8 ++++++++ .../Sema/attr-nonblocking-constraints.cpp | 19 +++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/clang/lib/Sema/SemaFunctionEffects.cpp b/clang/lib/Sema/SemaFunctionEffects.cpp index 4b63eb7df1054..12cc02965e7d3 100644 --- a/clang/lib/Sema/SemaFunctionEffects.cpp +++ b/clang/lib/Sema/SemaFunctionEffects.cpp @@ -1302,6 +1302,14 @@ class Analyzer { return true; } + bool TraverseCXXRecordDecl(CXXRecordDecl *D) override { + // Completely skip local struct/class/union declarations since their + // methods would otherwise be incorrectly interpreted as part of the + // function we are currently traversing. The initial Sema pass will have + // already recorded any nonblocking methods needing analysis. + return true; + } + bool TraverseConstructorInitializer(CXXCtorInitializer *Init) override { ViolationSite PrevVS = VSite; if (Init->isAnyMemberInitializer()) diff --git a/clang/test/Sema/attr-nonblocking-constraints.cpp b/clang/test/Sema/attr-nonblocking-constraints.cpp index 881e816292d59..012c017798a1f 100644 --- a/clang/test/Sema/attr-nonblocking-constraints.cpp +++ b/clang/test/Sema/attr-nonblocking-constraints.cpp @@ -104,6 +104,25 @@ void nb8c() }; } +void nb8d() [[clang::nonblocking]] +{ + // Blocking methods of a local CXXRecordDecl do not generate diagnostics + // for the outer function. + struct F1 { + void method() { void* ptr = new int; } + }; + + // Skipping the CXXRecordDecl does not skip a following VarDecl. + struct F2 { + F2() { void* ptr = new int; } // expected-note {{constructor cannot be inferred 'nonblocking' because it allocates or deallocates memory}} + } f2; // expected-warning {{function with 'nonblocking' attribute must not call non-'nonblocking' constructor 'nb8d()::F2::F2'}} + + // Nonblocking methods of a local CXXRecordDecl are verified independently. + struct F3 { + void method() [[clang::nonblocking]] { void* ptr = new int; }// expected-warning {{function with 'nonblocking' attribute must not allocate or deallocate memory}} + }; +} + // Make sure template expansions are found and verified. template struct Adder { From d568601d5a0c7c315d8038f4dc24e0ccd97a1ba0 Mon Sep 17 00:00:00 2001 From: Rahul Joshi Date: Wed, 5 Nov 2025 07:46:12 -0800 Subject: [PATCH 31/39] [NFC][TableGen] Adopt NamespaceEmitter in DirectiveEmitter (#165600) --- llvm/test/TableGen/directive1.td | 7 ++-- llvm/test/TableGen/directive2.td | 7 ++-- .../utils/TableGen/Basic/DirectiveEmitter.cpp | 36 ++++++------------- 3 files changed, 16 insertions(+), 34 deletions(-) diff --git a/llvm/test/TableGen/directive1.td b/llvm/test/TableGen/directive1.td index 8648651f3d714..5bd7890e0ddd1 100644 --- a/llvm/test/TableGen/directive1.td +++ b/llvm/test/TableGen/directive1.td @@ -186,8 +186,7 @@ def TDL_DirA : Directive<[Spelling<"dira">]> { // IMPL: #ifdef GEN_FLANG_DIRECTIVE_CLAUSE_SETS // IMPL-NEXT: #undef GEN_FLANG_DIRECTIVE_CLAUSE_SETS // IMPL-EMPTY: -// IMPL-NEXT: namespace llvm { -// IMPL-NEXT: namespace tdl { +// IMPL-NEXT: namespace llvm::tdl { // IMPL-EMPTY: // IMPL-NEXT: // Sets for dira // IMPL-EMPTY: @@ -204,8 +203,8 @@ def TDL_DirA : Directive<[Spelling<"dira">]> { // IMPL-EMPTY: // IMPL-NEXT: static requiredClauses_TDLD_dira { // IMPL-NEXT: }; -// IMPL-NEXT: } // namespace tdl -// IMPL-NEXT: } // namespace llvm +// IMPL-EMPTY: +// IMPL-NEXT: } // namespace llvm::tdl // IMPL-EMPTY: // IMPL-NEXT: #endif // GEN_FLANG_DIRECTIVE_CLAUSE_SETS // IMPL-EMPTY: diff --git a/llvm/test/TableGen/directive2.td b/llvm/test/TableGen/directive2.td index 96022d7647440..eaaf82ddaaf41 100644 --- a/llvm/test/TableGen/directive2.td +++ b/llvm/test/TableGen/directive2.td @@ -159,8 +159,7 @@ def TDL_DirA : Directive<[Spelling<"dira">]> { // IMPL: #ifdef GEN_FLANG_DIRECTIVE_CLAUSE_SETS // IMPL-NEXT: #undef GEN_FLANG_DIRECTIVE_CLAUSE_SETS // IMPL-EMPTY: -// IMPL-NEXT: namespace llvm { -// IMPL-NEXT: namespace tdl { +// IMPL-NEXT: namespace llvm::tdl { // IMPL-EMPTY: // IMPL-NEXT: // Sets for dira // IMPL-EMPTY: @@ -177,8 +176,8 @@ def TDL_DirA : Directive<[Spelling<"dira">]> { // IMPL-EMPTY: // IMPL-NEXT: static requiredClauses_TDLD_dira { // IMPL-NEXT: }; -// IMPL-NEXT: } // namespace tdl -// IMPL-NEXT: } // namespace llvm +// IMPL-EMPTY: +// IMPL-NEXT: } // namespace llvm::tdl // IMPL-EMPTY: // IMPL-NEXT: #endif // GEN_FLANG_DIRECTIVE_CLAUSE_SETS // IMPL-EMPTY: diff --git a/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp b/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp index d33bf45595e2e..0bb743dc8a7f5 100644 --- a/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp +++ b/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp @@ -359,7 +359,6 @@ static void emitDirectivesDecl(const RecordKeeper &Records, raw_ostream &OS) { OS << " static constexpr bool is_iterable = true;\n"; OS << "};\n"; } - LlvmNS.close(); } // Given a list of spellings (for a given clause/directive), order them @@ -931,27 +930,20 @@ static void generateClauseSet(ArrayRef VerClauses, // Generate an enum set for the 4 kinds of clauses linked to a directive. static void generateDirectiveClauseSets(const DirectiveLanguage &DirLang, Frontend FE, raw_ostream &OS) { + IfDefEmitter Scope(OS, "GEN_" + getFESpelling(FE).upper() + + "_DIRECTIVE_CLAUSE_SETS"); - std::string IfDefName{"GEN_"}; - IfDefName += getFESpelling(FE).upper(); - IfDefName += "_DIRECTIVE_CLAUSE_SETS"; - IfDefEmitter Scope(OS, IfDefName); - - StringRef Namespace = - getFESpelling(FE == Frontend::Flang ? Frontend::LLVM : FE); + std::string Namespace = + getFESpelling(FE == Frontend::Flang ? Frontend::LLVM : FE).str(); // The namespace has to be different for clang vs flang, as 2 structs with the // same name but different layout is UB. So just put the 'clang' on in the // clang namespace. - OS << "namespace " << Namespace << " {\n"; - - // Open namespaces defined in the directive language. - SmallVector Namespaces; - SplitString(DirLang.getCppNamespace(), Namespaces, "::"); - for (auto Ns : Namespaces) - OS << "namespace " << Ns << " {\n"; + // Additionally, open namespaces defined in the directive language. + if (!DirLang.getCppNamespace().empty()) + Namespace += "::" + DirLang.getCppNamespace().str(); + NamespaceEmitter NS(OS, Namespace); for (const Directive Dir : DirLang.getDirectives()) { - OS << "\n"; OS << "// Sets for " << Dir.getSpellingForIdentifier() << "\n"; generateClauseSet(Dir.getAllowedClauses(), OS, "allowedClauses_", Dir, @@ -963,12 +955,6 @@ static void generateDirectiveClauseSets(const DirectiveLanguage &DirLang, generateClauseSet(Dir.getRequiredClauses(), OS, "requiredClauses_", Dir, DirLang, FE); } - - // Closing namespaces - for (auto Ns : reverse(Namespaces)) - OS << "} // namespace " << Ns << "\n"; - - OS << "} // namespace " << Namespace << "\n"; } // Generate a map of directive (key) with DirectiveClauses struct as values. @@ -976,10 +962,8 @@ static void generateDirectiveClauseSets(const DirectiveLanguage &DirLang, // allowances (allowed, allowed once, allowed exclusive and required). static void generateDirectiveClauseMap(const DirectiveLanguage &DirLang, Frontend FE, raw_ostream &OS) { - std::string IfDefName{"GEN_"}; - IfDefName += getFESpelling(FE).upper(); - IfDefName += "_DIRECTIVE_CLAUSE_MAP"; - IfDefEmitter Scope(OS, IfDefName); + IfDefEmitter Scope(OS, "GEN_" + getFESpelling(FE).upper() + + "_DIRECTIVE_CLAUSE_MAP"); OS << "{\n"; From 3641e269b0fdf3614b6a2a068678d8431204a489 Mon Sep 17 00:00:00 2001 From: nerix Date: Wed, 5 Nov 2025 16:48:24 +0100 Subject: [PATCH 32/39] [MsDemangle] Read entire chain of target names in special tables (#155630) When there's a deep inheritance hierarchy of multiple C++ classes (see below), then the mangled name of a VFTable can include multiple key nodes in the target name. For example, in the following code, MSVC will generate mangled names for the VFTables that have up to three key classes in the context.
Code ```cpp class Base1 { virtual void a() {}; }; class Base2 { virtual void b() {} }; class Ind1 : public Base1 {}; class Ind2 : public Base1 {}; class A : public Ind1, public Ind2 {}; class Ind3 : public A {}; class Ind4 : public A {}; class B : public Ind3, public Ind4 {}; class Ind5 : public B {}; class Ind6 : public B {}; class C : public Ind5, public Ind6 {}; int main() { auto i = new C; } ```
This will include `??_7C@@6BInd1@@Ind4@@Ind5@@@` (and every other combination). Microsoft's undname will demangle this to "const C::\`vftable'{for \`Ind1's \`Ind4's \`Ind5'}". Previously, LLVM would demangle this to "const C::\`vftable'{for \`Ind1'}". With this PR, the output of LLVM's undname will be identical to Microsoft's version. This changes `SpecialTableSymbolNode::TargetName` to a node array which contains each key from the name. Unlike namespaces, these keys are not in reverse order - they are in the same order as in the mangled name. --- .../llvm/Demangle/MicrosoftDemangleNodes.h | 2 +- llvm/lib/Demangle/MicrosoftDemangle.cpp | 26 +++++++++++++++++-- llvm/lib/Demangle/MicrosoftDemangleNodes.cpp | 4 +-- llvm/test/Demangle/ms-operators.test | 15 +++++++++++ 4 files changed, 42 insertions(+), 5 deletions(-) diff --git a/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h b/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h index 155cfe8dd3a98..711aa70a4a8d3 100644 --- a/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h +++ b/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h @@ -708,7 +708,7 @@ struct DEMANGLE_ABI SpecialTableSymbolNode : public SymbolNode { return N->kind() == NodeKind::SpecialTableSymbol; } - QualifiedNameNode *TargetName = nullptr; + NodeArrayNode *TargetNames = nullptr; Qualifiers Quals = Qualifiers::Q_None; }; diff --git a/llvm/lib/Demangle/MicrosoftDemangle.cpp b/llvm/lib/Demangle/MicrosoftDemangle.cpp index b22928be3be50..250d382998982 100644 --- a/llvm/lib/Demangle/MicrosoftDemangle.cpp +++ b/llvm/lib/Demangle/MicrosoftDemangle.cpp @@ -15,6 +15,8 @@ #include "llvm/Demangle/MicrosoftDemangle.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Demangle/Demangle.h" #include "llvm/Demangle/DemangleConfig.h" #include "llvm/Demangle/MicrosoftDemangleNodes.h" @@ -277,6 +279,15 @@ demanglePointerCVQualifiers(std::string_view &MangledName) { DEMANGLE_UNREACHABLE; } +static NodeArrayNode *smallVecToNodeArray(ArenaAllocator &Arena, + ArrayRef Vec) { + NodeArrayNode *Arr = Arena.alloc(); + Arr->Count = Vec.size(); + Arr->Nodes = Arena.allocArray(Vec.size()); + std::memcpy(Arr->Nodes, Vec.data(), Vec.size() * sizeof(Node *)); + return Arr; +} + std::string_view Demangler::copyString(std::string_view Borrowed) { char *Stable = Arena.allocUnalignedBuffer(Borrowed.size()); // This is not a micro-optimization, it avoids UB, should Borrowed be an null @@ -323,8 +334,19 @@ Demangler::demangleSpecialTableSymbolNode(std::string_view &MangledName, } std::tie(STSN->Quals, IsMember) = demangleQualifiers(MangledName); - if (!consumeFront(MangledName, '@')) - STSN->TargetName = demangleFullyQualifiedTypeName(MangledName); + + SmallVector TargetNames; + while (!consumeFront(MangledName, '@')) { + QualifiedNameNode *QN = demangleFullyQualifiedTypeName(MangledName); + if (Error) + return nullptr; + assert(QN); + TargetNames.push_back(QN); + } + + if (!TargetNames.empty()) + STSN->TargetNames = smallVecToNodeArray(Arena, TargetNames); + return STSN; } diff --git a/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp b/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp index 61e4961c714bc..17c6aab500049 100644 --- a/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp +++ b/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp @@ -662,9 +662,9 @@ void VcallThunkIdentifierNode::output(OutputBuffer &OB, void SpecialTableSymbolNode::output(OutputBuffer &OB, OutputFlags Flags) const { outputQualifiers(OB, Quals, false, true); Name->output(OB, Flags); - if (TargetName) { + if (TargetNames) { OB << "{for `"; - TargetName->output(OB, Flags); + TargetNames->output(OB, Flags, "'s `"); OB << "'}"; } } diff --git a/llvm/test/Demangle/ms-operators.test b/llvm/test/Demangle/ms-operators.test index b940488786631..cafa1ae3c0663 100644 --- a/llvm/test/Demangle/ms-operators.test +++ b/llvm/test/Demangle/ms-operators.test @@ -143,9 +143,24 @@ ??_7A@B@@6BC@D@@@ ; CHECK: const B::A::`vftable'{for `D::C'} +??_7A@B@@6BC@D@@E@F@@@ +; CHECK: const B::A::`vftable'{for `D::C's `F::E'} + +??_7A@B@@6BC@D@@E@F@@G@H@@@ +; CHECK: const B::A::`vftable'{for `D::C's `F::E's `H::G'} + ??_8Middle2@@7B@ ; CHECK: const Middle2::`vbtable' +??_7A@@6BB@@@ +; CHECK: const A::`vftable'{for `B'} + +??_7A@@6BB@@C@@@ +; CHECK: const A::`vftable'{for `B's `C'} + +??_7A@@6BB@@C@@D@@@ +; CHECK: const A::`vftable'{for `B's `C's `D'} + ??_9Base@@$B7AA ; CHECK: [thunk]: __cdecl Base::`vcall'{8, {flat}} From ff108f7486fafb13f330cec324f59a04442d01d4 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Wed, 5 Nov 2025 08:02:55 -0800 Subject: [PATCH 33/39] Fix failures introduced in #166032 (#166574) --- llvm/lib/CodeGen/AtomicExpandPass.cpp | 2 +- llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index 6412949948c07..0b55c03a46747 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -1301,7 +1301,7 @@ Value *AtomicExpandImpl::insertRMWLLSCLoop( // Atomic RMW expands to a Load-linked / Store-Conditional loop, because it is // hard to predict precise branch weigths we mark the branch as "unknown" // (50/50) to prevent misleading optimizations. - setExplicitlyUnknownBranchWeightsIfProfiled(*CondBr, *F, DEBUG_TYPE); + setExplicitlyUnknownBranchWeightsIfProfiled(*CondBr, DEBUG_TYPE); Builder.SetInsertPoint(ExitBB, ExitBB->begin()); return Loaded; diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp index 7930b38174e49..0f3e66476f055 100644 --- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -388,8 +388,7 @@ static void buildPartialInvariantUnswitchConditionalBranch( IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc, Direction ? &NormalSucc : &UnswitchedSucc, ProfData); if (!ProfData) - setExplicitlyUnknownBranchWeightsIfProfiled(*BR, *BR->getFunction(), - DEBUG_TYPE); + setExplicitlyUnknownBranchWeightsIfProfiled(*BR, DEBUG_TYPE); } /// Rewrite the PHI nodes in an unswitched loop exit basic block. @@ -3203,8 +3202,7 @@ injectPendingInvariantConditions(NonTrivialUnswitchCandidate Candidate, Loop &L, auto *InvariantBr = Builder.CreateCondBr(InjectedCond, InLoopSucc, CheckBlock); // We don't know anything about the relation between the limits. - setExplicitlyUnknownBranchWeightsIfProfiled( - *InvariantBr, *InvariantBr->getParent()->getParent(), DEBUG_TYPE); + setExplicitlyUnknownBranchWeightsIfProfiled(*InvariantBr, DEBUG_TYPE); Builder.SetInsertPoint(CheckBlock); Builder.CreateCondBr( From a796d1836930bc721686a728852b820a2ea4de92 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Wed, 5 Nov 2025 16:03:31 +0000 Subject: [PATCH 34/39] [gn build] Port 370058777be2 --- llvm/utils/gn/secondary/llvm/lib/BinaryFormat/BUILD.gn | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/utils/gn/secondary/llvm/lib/BinaryFormat/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/BinaryFormat/BUILD.gn index 1a890f6733597..a234d2be67f66 100644 --- a/llvm/utils/gn/secondary/llvm/lib/BinaryFormat/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/BinaryFormat/BUILD.gn @@ -12,7 +12,6 @@ static_library("BinaryFormat") { "ELF.cpp", "MachO.cpp", "Magic.cpp", - "Minidump.cpp", "MsgPackDocument.cpp", "MsgPackDocumentYAML.cpp", "MsgPackReader.cpp", From ef6947b098e8086ac0d72086b1c63cb8d82ba797 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Wed, 5 Nov 2025 16:03:32 +0000 Subject: [PATCH 35/39] [gn build] Port 3ebed51e997b --- llvm/utils/gn/secondary/clang/unittests/CodeGen/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/clang/unittests/CodeGen/BUILD.gn b/llvm/utils/gn/secondary/clang/unittests/CodeGen/BUILD.gn index 065fc6cdd74a3..bd8d9610c2a4a 100644 --- a/llvm/utils/gn/secondary/clang/unittests/CodeGen/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/unittests/CodeGen/BUILD.gn @@ -17,6 +17,7 @@ unittest("ClangCodeGenTests") { "BufferSourceTest.cpp", "CheckTargetFeaturesTest.cpp", "CodeGenExternalTest.cpp", + "DemangleTrapReasonInDebugInfo.cpp", "TBAAMetadataTest.cpp", ] } From 9bb67f88ed287ebf386ed7579d05abd5d214e52a Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Wed, 5 Nov 2025 16:03:33 +0000 Subject: [PATCH 36/39] [gn build] Port 51d0f6d6e172 --- .../gn/secondary/clang-tools-extra/clang-tidy/bugprone/BUILD.gn | 1 + .../gn/secondary/clang-tools-extra/clang-tidy/cert/BUILD.gn | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/bugprone/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/bugprone/BUILD.gn index 2f84999621e1b..3c3fdf7e16885 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/bugprone/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/bugprone/BUILD.gn @@ -82,6 +82,7 @@ static_library("bugprone") { "SmartPtrArrayMismatchCheck.cpp", "SpuriouslyWakeUpFunctionsCheck.cpp", "StandaloneEmptyCheck.cpp", + "StdNamespaceModificationCheck.cpp", "StringConstructorCheck.cpp", "StringIntegerAssignmentCheck.cpp", "StringLiteralWithEmbeddedNulCheck.cpp", diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/cert/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/cert/BUILD.gn index ec642b6afad66..1eae289143b5b 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/cert/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/cert/BUILD.gn @@ -16,7 +16,6 @@ static_library("cert") { ] sources = [ "CERTTidyModule.cpp", - "DontModifyStdNamespaceCheck.cpp", "FloatLoopCounter.cpp", "LimitedRandomnessCheck.cpp", "MutatingCopyCheck.cpp", From 3bf0ce15f80ddffe0fff1a52500c79b9bcd011e4 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Wed, 5 Nov 2025 16:03:34 +0000 Subject: [PATCH 37/39] [gn build] Port 718a3b268fcf --- llvm/utils/gn/secondary/bolt/lib/Passes/BUILD.gn | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/utils/gn/secondary/bolt/lib/Passes/BUILD.gn b/llvm/utils/gn/secondary/bolt/lib/Passes/BUILD.gn index 393309ee39bfe..a261f2866be47 100644 --- a/llvm/utils/gn/secondary/bolt/lib/Passes/BUILD.gn +++ b/llvm/utils/gn/secondary/bolt/lib/Passes/BUILD.gn @@ -12,7 +12,7 @@ static_library("Passes") { "//llvm/utils/gn/build/libs/pthread", ] sources = [ - "ADRRelaxationPass.cpp", + "AArch64RelaxationPass.cpp", "Aligner.cpp", "AllocCombiner.cpp", "AsmDump.cpp", From ce5dac67ffddfa9b9f141b35a4bf05bdee970676 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Wed, 5 Nov 2025 16:03:35 +0000 Subject: [PATCH 38/39] [gn build] Port dd14eb8242d7 --- llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn index a1f5b475e2096..ad72c0069237d 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn @@ -151,6 +151,7 @@ static_library("LLVMRISCVCodeGen") { "RISCVMoveMerger.cpp", "RISCVOptWInstrs.cpp", "RISCVPostRAExpandPseudoInsts.cpp", + "RISCVPromoteConstant.cpp", "RISCVPushPopOptimizer.cpp", "RISCVRedundantCopyElimination.cpp", "RISCVRegisterInfo.cpp", From 19291b778cff940252decf96a00cd18fdb5c3114 Mon Sep 17 00:00:00 2001 From: Jack Styles Date: Wed, 5 Nov 2025 14:25:07 +0000 Subject: [PATCH 39/39] [Flang][OpenMP] Add Lowering support for taskloop reductions Support for lowering the Reduction clause support already exists, so we can extend the support for taskloop to include reduction. As support for Reduction in taskloop was only added in OpenMP 5.0, the use of the Clause has been restricted to that version of OpenMP or greater. --- flang/lib/Lower/OpenMP/OpenMP.cpp | 20 ++++++++----- .../Lower/OpenMP/Todo/taskloop-reduction.f90 | 13 -------- .../test/Lower/OpenMP/taskloop-reduction.f90 | 30 +++++++++++++++++++ llvm/include/llvm/Frontend/OpenMP/OMP.td | 21 +++++++------ 4 files changed, 52 insertions(+), 32 deletions(-) delete mode 100644 flang/test/Lower/OpenMP/Todo/taskloop-reduction.f90 create mode 100644 flang/test/Lower/OpenMP/taskloop-reduction.f90 diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index ad456d89bc432..51170a39d272b 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -1763,21 +1763,22 @@ static void genTaskgroupClauses( cp.processTaskReduction(loc, clauseOps, taskReductionSyms); } -static void genTaskloopClauses(lower::AbstractConverter &converter, - semantics::SemanticsContext &semaCtx, - lower::StatementContext &stmtCtx, - const List &clauses, mlir::Location loc, - mlir::omp::TaskloopOperands &clauseOps) { +static void genTaskloopClauses( + lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, + lower::StatementContext &stmtCtx, const List &clauses, + mlir::Location loc, mlir::omp::TaskloopOperands &clauseOps, + llvm::SmallVectorImpl &taskReductionSyms) { ClauseProcessor cp(converter, semaCtx, clauses); cp.processGrainsize(stmtCtx, clauseOps); cp.processNumTasks(stmtCtx, clauseOps); + cp.processReduction(loc, clauseOps, taskReductionSyms); cp.processTODO(loc, llvm::omp::Directive::OMPD_taskloop); + clause::Priority, clause::Shared, clause::Untied>( + loc, llvm::omp::Directive::OMPD_taskloop); } static void genTaskwaitClauses(lower::AbstractConverter &converter, @@ -2979,8 +2980,9 @@ static mlir::omp::TaskloopOp genStandaloneTaskloop( lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { mlir::omp::TaskloopOperands taskloopClauseOps; + llvm::SmallVector taskReductionSyms; genTaskloopClauses(converter, semaCtx, stmtCtx, item->clauses, loc, - taskloopClauseOps); + taskloopClauseOps, taskReductionSyms); DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval, /*shouldCollectPreDeterminedSymbols=*/true, enableDelayedPrivatization, symTable); @@ -2994,6 +2996,8 @@ static mlir::omp::TaskloopOp genStandaloneTaskloop( EntryBlockArgs taskloopArgs; taskloopArgs.priv.syms = dsp.getDelayedPrivSymbols(); taskloopArgs.priv.vars = taskloopClauseOps.privateVars; + taskloopArgs.reduction.syms = taskReductionSyms; + taskloopArgs.reduction.vars = taskloopClauseOps.reductionVars; auto taskLoopOp = genWrapperOp( converter, loc, taskloopClauseOps, taskloopArgs); diff --git a/flang/test/Lower/OpenMP/Todo/taskloop-reduction.f90 b/flang/test/Lower/OpenMP/Todo/taskloop-reduction.f90 deleted file mode 100644 index 0c16bd227257f..0000000000000 --- a/flang/test/Lower/OpenMP/Todo/taskloop-reduction.f90 +++ /dev/null @@ -1,13 +0,0 @@ -! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -fopenmp-version=50 -o - %s 2>&1 | FileCheck %s -! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -fopenmp-version=50 -o - %s 2>&1 | FileCheck %s - -! CHECK: not yet implemented: Unhandled clause REDUCTION in TASKLOOP construct -subroutine omp_taskloop_reduction() - integer x - x = 0 - !$omp taskloop reduction(+:x) - do i = 1, 100 - x = x + 1 - end do - !$omp end taskloop -end subroutine omp_taskloop_reduction diff --git a/flang/test/Lower/OpenMP/taskloop-reduction.f90 b/flang/test/Lower/OpenMP/taskloop-reduction.f90 new file mode 100644 index 0000000000000..4185a927366e7 --- /dev/null +++ b/flang/test/Lower/OpenMP/taskloop-reduction.f90 @@ -0,0 +1,30 @@ +! This test checks the lowering of the reduction clause in the taskloop construct +! RUN: bbc -emit-hlfir -fopenmp -fopenmp-version=50 -o - %s 2>&1 | FileCheck %s +! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=50 -o - %s 2>&1 | FileCheck %s +! RUN %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=45 -o - %s 2>&1 | FileCheck %s --check-prefix=CHECK-VERSION + +! CHECK-VERSION: error: REDUCTION clause is not allowed on directive TASKLOOP in OpenMP v4.5, try -fopenmp-version=50 + +! CHECK-LABEL: omp.private +! CHECK-SAME: {type = private} @[[I_PRIVATE:.*]] : i32 + +! CHECK-LABEL: func.func @_QPtest_reduction() +! CHECK: %[[ALLOCA_A:.*]] = fir.alloca !fir.array<10xi32> {bindc_name = "a", uniq_name = "_QFtest_reductionEa"} +! CHECK: %[[DECLARE_A:.*]]:2 = hlfir.declare %[[ALLOCA_A]](%2) {uniq_name = "_QFtest_reductionEa"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[ALLOCA_I:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFtest_reductionEi"} +! CHECK: %[[DECLARE_I:.*]]:2 = hlfir.declare %[[ALLOCA_I]] {uniq_name = "_QFtest_reductionEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[ALLOCA_SUM_I:.*]] = fir.alloca i32 {bindc_name = "sum_i", uniq_name = "_QFtest_reductionEsum_i"} +! CHECK: %[[DECLARE_SUM_I:.*]]:2 = hlfir.declare %[[ALLOCA_SUM_I]] {uniq_name = "_QFtest_reductionEsum_i"} : (!fir.ref) -> (!fir.ref, !fir.ref) + +subroutine test_reduction + integer :: i, a(10), sum_i + + ! CHECK: omp.taskloop + ! CHECK-SAME: private(@[[I_PRIVATE]] %[[DECLARE_I]]#0 -> %arg0 : !fir.ref) reduction(@add_reduction_i32 %[[DECLARE_SUM_I]]#0 -> %arg1 : !fir.ref) { + !$omp taskloop reduction (+:sum_i) + do i = 1,10 + sum_i = sum_i + i + end do + !$omp end taskloop + +end subroutine \ No newline at end of file diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index 208609f64f418..0afae8a013bd6 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -1284,17 +1284,16 @@ def OMP_TaskGroup : Directive<[Spelling<"taskgroup">]> { let category = CA_Executable; } def OMP_TaskLoop : Directive<[Spelling<"taskloop">]> { - let allowedClauses = [ - VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, + let allowedClauses = [VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, ]; let allowedOnceClauses = [ VersionedClause,