From 13983a273ef3b4d7d79d98a076de6822c17973be Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Thu, 22 Aug 2024 14:28:28 -0700 Subject: [PATCH 01/11] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20?= =?UTF-8?q?changes=20to=20main=20this=20commit=20is=20based=20on?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created using spr 1.3.6-beta.1 [skip ci] --- llvm/include/llvm/IR/DataLayout.h | 28 +++++++++++++++++----------- llvm/lib/IR/DataLayout.cpp | 30 +++++++++++++++++++++--------- 2 files changed, 38 insertions(+), 20 deletions(-) diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h index 2f06bda6c30a5..953deb6653cc9 100644 --- a/llvm/include/llvm/IR/DataLayout.h +++ b/llvm/include/llvm/IR/DataLayout.h @@ -78,7 +78,11 @@ class DataLayout { Align ABIAlign; Align PrefAlign; uint32_t IndexBitWidth; - + /// Pointers in this address space don't have a well-defined bitwise + /// representation (e.g. may be relocated by a copying garbage collector). + /// Additionally, they may also be non-integral (i.e. containing additional + /// metadata such as bounds information/permissions). + bool IsNonIntegral = false; bool operator==(const PointerSpec &Other) const; }; @@ -133,10 +137,6 @@ class DataLayout { // The StructType -> StructLayout map. mutable void *LayoutMap = nullptr; - /// Pointers in these address spaces are non-integral, and don't have a - /// well-defined bitwise representation. - SmallVector NonIntegralAddressSpaces; - /// Sets or updates the specification for the given primitive type. void setPrimitiveSpec(char Specifier, uint32_t BitWidth, Align ABIAlign, Align PrefAlign); @@ -147,7 +147,8 @@ class DataLayout { /// Sets or updates the specification for pointer in the given address space. void setPointerSpec(uint32_t AddrSpace, uint32_t BitWidth, Align ABIAlign, - Align PrefAlign, uint32_t IndexBitWidth); + Align PrefAlign, uint32_t IndexBitWidth, + bool IsNonIntegral); /// Internal helper to get alignment for integer of given bitwidth. Align getIntegerAlignment(uint32_t BitWidth, bool abi_or_pref) const; @@ -165,7 +166,8 @@ class DataLayout { Error parsePointerSpec(StringRef Spec); /// Attempts to parse a single specification. - Error parseSpecification(StringRef Spec); + Error parseSpecification(StringRef Spec, + SmallVectorImpl &NonIntegralAddressSpaces); /// Attempts to parse a data layout string. Error parseLayoutString(StringRef LayoutString); @@ -343,13 +345,17 @@ class DataLayout { /// Return the address spaces containing non-integral pointers. Pointers in /// this address space don't have a well-defined bitwise representation. - ArrayRef getNonIntegralAddressSpaces() const { - return NonIntegralAddressSpaces; + SmallVector getNonIntegralAddressSpaces() const { + SmallVector AddrSpaces; + for (const PointerSpec &PS : PointerSpecs) { + if (PS.IsNonIntegral) + AddrSpaces.push_back(PS.AddrSpace); + } + return AddrSpaces; } bool isNonIntegralAddressSpace(unsigned AddrSpace) const { - ArrayRef NonIntegralSpaces = getNonIntegralAddressSpaces(); - return is_contained(NonIntegralSpaces, AddrSpace); + return getPointerSpec(AddrSpace).IsNonIntegral; } bool isNonIntegralPointerType(PointerType *PT) const { diff --git a/llvm/lib/IR/DataLayout.cpp b/llvm/lib/IR/DataLayout.cpp index d295d1f5785eb..a7a1fa8ef03ed 100644 --- a/llvm/lib/IR/DataLayout.cpp +++ b/llvm/lib/IR/DataLayout.cpp @@ -151,7 +151,8 @@ bool DataLayout::PrimitiveSpec::operator==(const PrimitiveSpec &Other) const { bool DataLayout::PointerSpec::operator==(const PointerSpec &Other) const { return AddrSpace == Other.AddrSpace && BitWidth == Other.BitWidth && ABIAlign == Other.ABIAlign && PrefAlign == Other.PrefAlign && - IndexBitWidth == Other.IndexBitWidth; + IndexBitWidth == Other.IndexBitWidth && + IsNonIntegral == Other.IsNonIntegral; } namespace { @@ -206,7 +207,8 @@ constexpr DataLayout::PrimitiveSpec DefaultVectorSpecs[] = { // Default pointer type specifications. constexpr DataLayout::PointerSpec DefaultPointerSpecs[] = { - {0, 64, Align::Constant<8>(), Align::Constant<8>(), 64} // p0:64:64:64:64 + {0, 64, Align::Constant<8>(), Align::Constant<8>(), 64, + false} // p0:64:64:64:64 }; DataLayout::DataLayout() @@ -239,13 +241,11 @@ DataLayout &DataLayout::operator=(const DataLayout &Other) { PointerSpecs = Other.PointerSpecs; StructABIAlignment = Other.StructABIAlignment; StructPrefAlignment = Other.StructPrefAlignment; - NonIntegralAddressSpaces = Other.NonIntegralAddressSpaces; return *this; } bool DataLayout::operator==(const DataLayout &Other) const { // NOTE: StringRepresentation might differ, it is not canonicalized. - // FIXME: NonIntegralAddressSpaces isn't compared. return BigEndian == Other.BigEndian && AllocaAddrSpace == Other.AllocaAddrSpace && ProgramAddrSpace == Other.ProgramAddrSpace && @@ -454,11 +454,13 @@ Error DataLayout::parsePointerSpec(StringRef Spec) { return createStringError( "index size cannot be larger than the pointer size"); - setPointerSpec(AddrSpace, BitWidth, ABIAlign, PrefAlign, IndexBitWidth); + setPointerSpec(AddrSpace, BitWidth, ABIAlign, PrefAlign, IndexBitWidth, + false); return Error::success(); } -Error DataLayout::parseSpecification(StringRef Spec) { +Error DataLayout::parseSpecification( + StringRef Spec, SmallVectorImpl &NonIntegralAddressSpaces) { // The "ni" specifier is the only two-character specifier. Handle it first. if (Spec.starts_with("ni")) { // ni:
[:
]... @@ -614,12 +616,21 @@ Error DataLayout::parseLayoutString(StringRef LayoutString) { // Split the data layout string into specifications separated by '-' and // parse each specification individually, updating internal data structures. + SmallVector NonIntegralAddressSpaces; for (StringRef Spec : split(LayoutString, '-')) { if (Spec.empty()) return createStringError("empty specification is not allowed"); - if (Error Err = parseSpecification(Spec)) + if (Error Err = parseSpecification(Spec, NonIntegralAddressSpaces)) return Err; } + // Mark all address spaces that were qualified as non-integral now. This has + // to be done later since the non-integral property is not part of the data + // layout pointer specification. + for (unsigned AS : NonIntegralAddressSpaces) { + const PointerSpec &PS = getPointerSpec(AS); + setPointerSpec(AS, PS.BitWidth, PS.ABIAlign, PS.PrefAlign, PS.IndexBitWidth, + true); + } return Error::success(); } @@ -666,16 +677,17 @@ DataLayout::getPointerSpec(uint32_t AddrSpace) const { void DataLayout::setPointerSpec(uint32_t AddrSpace, uint32_t BitWidth, Align ABIAlign, Align PrefAlign, - uint32_t IndexBitWidth) { + uint32_t IndexBitWidth, bool IsNonIntegral) { auto I = lower_bound(PointerSpecs, AddrSpace, LessPointerAddrSpace()); if (I == PointerSpecs.end() || I->AddrSpace != AddrSpace) { PointerSpecs.insert(I, PointerSpec{AddrSpace, BitWidth, ABIAlign, PrefAlign, - IndexBitWidth}); + IndexBitWidth, IsNonIntegral}); } else { I->BitWidth = BitWidth; I->ABIAlign = ABIAlign; I->PrefAlign = PrefAlign; I->IndexBitWidth = IndexBitWidth; + I->IsNonIntegral = IsNonIntegral; } } From e4bd1181d160b8728e7d4158417a83e183bd1709 Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Thu, 22 Aug 2024 14:36:04 -0700 Subject: [PATCH 02/11] fix indentation in langref Created using spr 1.3.6-beta.1 --- llvm/docs/LangRef.rst | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 200224c78be00..1a59fba65815c 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -3103,19 +3103,19 @@ as follows: ``A
`` Specifies the address space of objects created by '``alloca``'. Defaults to the default address space of 0. -``p[][n]::[:][:]`` +``p[][
]::[:][:]`` This specifies the *size* of a pointer and its ```` and ````\erred alignments for address space ``n``. ```` is optional and defaults to ````. The fourth parameter ```` is the size of the index that used for address calculation, which must be less than or equal to the pointer size. If not specified, the default index size is equal to the pointer size. All sizes - are in bits. The address space, ``n``, is optional, and if not specified, - denotes the default address space 0. The value of ``n`` must be - in the range [1,2^24). + are in bits. The ``
``, is optional, and if not specified, + denotes the default address space 0. The value of ``
`` must + be in the range [1,2^24). The optional```` are used to specify properties of pointers in this -address space: the character ``u`` marks pointers as having an unstable - representation and ```n`` marks pointers as non-integral (i.e. having + address space: the character ``u`` marks pointers as having an unstable + representation and ``n`` marks pointers as non-integral (i.e. having additional metadata). See :ref:`Non-Integral Pointer Types `. ``i:[:]`` From db97145d3a653f2999b5935f9b1cb4550230689d Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Fri, 25 Oct 2024 12:51:11 -0700 Subject: [PATCH 03/11] include feedback Created using spr 1.3.6-beta.1 --- llvm/docs/LangRef.rst | 30 +++++++++++++++++------------- llvm/include/llvm/IR/DataLayout.h | 8 ++++---- 2 files changed, 21 insertions(+), 17 deletions(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index c137318af678b..3c3d0e0b4ab8e 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -659,7 +659,7 @@ LLVM IR optionally allows the frontend to denote pointers in certain address spaces as "non-integral" or "unstable" (or both "non-integral" and "unstable") via the :ref:`datalayout string`. -These exact implications of these properties are target-specific, but the +The exact implications of these properties are target-specific, but the following IR semantics and restrictions to optimization passes apply: Unstable pointer representation @@ -668,7 +668,7 @@ Unstable pointer representation Pointers in this address space have an *unspecified* bitwise representation (i.e. not backed by a fixed integer). The bitwise pattern of such pointers is allowed to change in a target-specific way. For example, this could be a pointer -type used for with copying garbage collection where the garbage collector could +type used with copying garbage collection where the garbage collector could update the pointer at any time in the collection sweep. ``inttoptr`` and ``ptrtoint`` instructions have the same semantics as for @@ -705,10 +705,10 @@ representation of the pointer. Non-integral pointer representation ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Pointers are not represented as an address, but may instead include +Pointers are not represented as just an address, but may instead include additional metadata such as bounds information or a temporal identifier. Examples include AMDGPU buffer descriptors with a 128-bit fat pointer and a -32-bit offset or CHERI capabilities that contain bounds, permissions and an +32-bit offset, or CHERI capabilities that contain bounds, permissions and an out-of-band validity bit. In general, these pointers cannot be re-created from just an integer value. @@ -716,23 +716,25 @@ In most cases pointers with a non-integral representation behave exactly the same as an integral pointer, the only difference is that it is not possible to create a pointer just from an address. -"Non-integral" pointers also impose restrictions on the optimizer, but in -general these are less restrictive than for "unstable" pointers. The main +"Non-integral" pointers also impose restrictions on transformation passes, but +in general these are less restrictive than for "unstable" pointers. The main difference compared to integral pointers is that ``inttoptr`` instructions should not be inserted by passes as they may not be able to create a valid pointer. This property also means that ``inttoptr(ptrtoint(x))`` cannot be folded to ``x`` as the ``ptrtoint`` operation may destroy the necessary metadata to reconstruct the pointer. -Additionaly, since there could be out-of-band state, it is also not legal to +Additionally, since there could be out-of-band state, it is also not legal to convert a load/store of a non-integral pointer type to a load/store of an -integer type with same bitwidth as that may not copy all the state. -However, it is legal to use appropriately aligned ``llvm.memcpy`` and -``llvm.memmove`` for copies of non-integral pointers as long as these are not -converted into integer operations. +integer type with same bitwidth, as that may not copy all the state. +However, it is legal to use appropriately-aligned ``llvm.memcpy`` and +``llvm.memmove`` for copies of non-integral pointers. +NOTE: Lowering of ``llvm.memcpy`` containing non-integral pointer types must use +appropriately-aligned and sized types instead of smaller integer types. Unlike "unstable" pointers, the bit-wise representation is stable and -``ptrtoint(x)`` always yields a deterministic values. -This means optimizer is still permitted to insert new ``ptrtoint`` instructions. +``ptrtoint(x)`` always yields a deterministic value. +This means transformation passes are still permitted to insert new ``ptrtoint`` +instructions. However, it is important to note that ``ptrtoint`` may not yield the same value as storing the pointer via memory and reading it back as an integer, even if the bitwidth of the two types matches (since ptrtoint could involve some form of @@ -12187,6 +12189,8 @@ If ``value`` is smaller than ``ty2`` then a zero extension is done. If ``value`` is larger than ``ty2`` then a truncation is done. If they are the same size, then nothing is done (*no-op cast*) other than a type change. +For :ref:`non-integral pointers <_nointptrtype>` the ``ptrtoint`` instruction +may involve additional transformations beyond truncations or extension. Example: """""""" diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h index ca185bfec851a..206abcdbea0a3 100644 --- a/llvm/include/llvm/IR/DataLayout.h +++ b/llvm/include/llvm/IR/DataLayout.h @@ -357,8 +357,8 @@ class DataLayout { /// instructions operating on pointers of this address space. /// TODO: remove this function after migrating to finer-grained properties. bool isNonIntegralAddressSpace(unsigned AddrSpace) const { - const PointerSpec &PS = getPointerSpec(AddrSpace); - return PS.HasNonIntegralRepresentation || PS.HasUnstableRepresentation; + return hasUnstableRepresentation(AddrSpace) || + hasNonIntegralRepresentation(AddrSpace); } /// Returns whether this address space has an "unstable" pointer @@ -390,8 +390,8 @@ class DataLayout { /// representations (hasUnstableRepresentation()) unless the pass knows it is /// within a critical section that retains the current representation. bool shouldAvoidIntToPtr(unsigned AddrSpace) const { - const PointerSpec &PS = getPointerSpec(AddrSpace); - return PS.HasNonIntegralRepresentation || PS.HasUnstableRepresentation; + return hasUnstableRepresentation(AddrSpace) || + hasNonIntegralRepresentation(AddrSpace); } /// Returns whether passes should avoid introducing `ptrtoint` instructions From 94ecfa353dcf44087797594a8f77f9653c8b8e4a Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Fri, 25 Oct 2024 14:54:59 -0700 Subject: [PATCH 04/11] address more feedback Created using spr 1.3.6-beta.1 --- llvm/docs/LangRef.rst | 16 ++++++---- llvm/include/llvm/IR/DataLayout.h | 6 ++-- llvm/lib/IR/DataLayout.cpp | 5 +-- llvm/unittests/IR/DataLayoutTest.cpp | 46 ++++++++++++++++------------ 4 files changed, 43 insertions(+), 30 deletions(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 3c3d0e0b4ab8e..2313527afedd7 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -709,8 +709,10 @@ Pointers are not represented as just an address, but may instead include additional metadata such as bounds information or a temporal identifier. Examples include AMDGPU buffer descriptors with a 128-bit fat pointer and a 32-bit offset, or CHERI capabilities that contain bounds, permissions and an -out-of-band validity bit. In general, these pointers cannot be re-created -from just an integer value. +out-of-band validity bit. In general, valid non-integral pointers cannot be +created from just an integer value: while ``inttoptr`` yields a deterministic +bitwise pattern, the resulting value is not guaranteed to be a valid +dereferenceable pointer. In most cases pointers with a non-integral representation behave exactly the same as an integral pointer, the only difference is that it is not possible to @@ -3200,9 +3202,11 @@ as follows: this set are considered to support most general arithmetic operations efficiently. ``ni:
:
:
...`` - This specifies pointer types with the specified address spaces - as :ref:`Non-Integral Pointer Type ` s. The ``0`` - address space cannot be specified as non-integral. + This marks pointer types with the specified address spaces + as :ref:`non-integral and unstable `. + The ``0`` address space cannot be specified as non-integral. + It is only supported for backwards compatibility, the flags of the ``p`` + specifier should be used instead for new code. On every specification that takes a ``:``, specifying the ```` alignment is optional. If omitted, the preceding ``:`` @@ -12189,7 +12193,7 @@ If ``value`` is smaller than ``ty2`` then a zero extension is done. If ``value`` is larger than ``ty2`` then a truncation is done. If they are the same size, then nothing is done (*no-op cast*) other than a type change. -For :ref:`non-integral pointers <_nointptrtype>` the ``ptrtoint`` instruction +For :ref:`non-integral pointers ` the ``ptrtoint`` instruction may involve additional transformations beyond truncations or extension. Example: diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h index 206abcdbea0a3..af9556feb724f 100644 --- a/llvm/include/llvm/IR/DataLayout.h +++ b/llvm/include/llvm/IR/DataLayout.h @@ -341,9 +341,9 @@ class DataLayout { /// rounded up to a whole number of bytes. unsigned getIndexSize(unsigned AS) const; - /// Return the address spaces containing non-integral pointers. Pointers in - /// this address space don't have a well-defined bitwise representation. - SmallVector getNonIntegralAddressSpaces() const { + /// Return the address spaces with special pointer semantics (such as being + /// unstable or non-integral). + SmallVector getNonStandardAddressSpaces() const { SmallVector AddrSpaces; for (const PointerSpec &PS : PointerSpecs) { if (PS.HasNonIntegralRepresentation || PS.HasUnstableRepresentation) diff --git a/llvm/lib/IR/DataLayout.cpp b/llvm/lib/IR/DataLayout.cpp index 722f7b57d160e..9de984175228f 100644 --- a/llvm/lib/IR/DataLayout.cpp +++ b/llvm/lib/IR/DataLayout.cpp @@ -209,7 +209,7 @@ constexpr DataLayout::PrimitiveSpec DefaultVectorSpecs[] = { // Default pointer type specifications. constexpr DataLayout::PointerSpec DefaultPointerSpecs[] = { // p0:64:64:64:64 - {0, 64, Align::Constant<8>(), Align::Constant<8>(), 64, false}, + {0, 64, Align::Constant<8>(), Align::Constant<8>(), 64, false, false}, }; DataLayout::DataLayout() @@ -437,7 +437,8 @@ Error DataLayout::parsePointerSpec(StringRef Spec) { return Err; } if (AddrSpace == 0 && (NonIntegralRepr || UnstableRepr)) - return createStringError("address space 0 cannot be non-integral"); + return createStringError( + "address space 0 cannot be non-integral or unstable"); // Size. Required, cannot be zero. unsigned BitWidth; diff --git a/llvm/unittests/IR/DataLayoutTest.cpp b/llvm/unittests/IR/DataLayoutTest.cpp index 056584badcf74..8b6616ce0fb16 100644 --- a/llvm/unittests/IR/DataLayoutTest.cpp +++ b/llvm/unittests/IR/DataLayoutTest.cpp @@ -412,7 +412,7 @@ TEST(DataLayout, ParsePointerSpec) { "pn0:64:64", "pu0:64:64", "pun0:64:64", "pnu0:64:64"}) EXPECT_THAT_EXPECTED( DataLayout::parse(Str), - FailedWithMessage("address space 0 cannot be non-integral")); + FailedWithMessage("address space 0 cannot be non-integral or unstable")); } TEST(DataLayoutTest, ParseNativeIntegersSpec) { @@ -569,12 +569,12 @@ TEST(DataLayout, GetPointerPrefAlignment) { TEST(DataLayout, IsNonIntegralAddressSpace) { DataLayout Default; - EXPECT_THAT(Default.getNonIntegralAddressSpaces(), ::testing::SizeIs(0)); + EXPECT_THAT(Default.getNonStandardAddressSpaces(), ::testing::SizeIs(0)); EXPECT_FALSE(Default.isNonIntegralAddressSpace(0)); EXPECT_FALSE(Default.isNonIntegralAddressSpace(1)); DataLayout Custom = cantFail(DataLayout::parse("ni:2:16777215")); - EXPECT_THAT(Custom.getNonIntegralAddressSpaces(), + EXPECT_THAT(Custom.getNonStandardAddressSpaces(), ::testing::ElementsAreArray({2U, 16777215U})); EXPECT_FALSE(Custom.isNonIntegralAddressSpace(0)); EXPECT_FALSE(Custom.isNonIntegralAddressSpace(1)); @@ -582,37 +582,45 @@ TEST(DataLayout, IsNonIntegralAddressSpace) { EXPECT_TRUE(Custom.isNonIntegralAddressSpace(16777215)); // Pointers can be marked as non-integral using 'pn' - DataLayout NonIntegral = cantFail(DataLayout::parse("pn2:64:64:64:32")); - EXPECT_TRUE(NonIntegral.isNonIntegralAddressSpace(2)); - EXPECT_TRUE(NonIntegral.hasNonIntegralRepresentation(2)); - EXPECT_FALSE(NonIntegral.hasUnstableRepresentation(2)); - EXPECT_TRUE(NonIntegral.shouldAvoidIntToPtr(2)); - EXPECT_FALSE(NonIntegral.shouldAvoidPtrToInt(2)); + Custom = cantFail(DataLayout::parse("pn2:64:64:64:32")); + EXPECT_TRUE(Custom.isNonIntegralAddressSpace(2)); + EXPECT_TRUE(Custom.hasNonIntegralRepresentation(2)); + EXPECT_FALSE(Custom.hasUnstableRepresentation(2)); + EXPECT_TRUE(Custom.shouldAvoidIntToPtr(2)); + EXPECT_FALSE(Custom.shouldAvoidPtrToInt(2)); + EXPECT_THAT(Custom.getNonStandardAddressSpaces(), + ::testing::ElementsAreArray({2U})); // Pointers can be marked as unstable using 'pu' - DataLayout Unstable = cantFail(DataLayout::parse("pu2:64:64:64:32")); - EXPECT_TRUE(Unstable.isNonIntegralAddressSpace(2)); - EXPECT_TRUE(Unstable.hasUnstableRepresentation(2)); - EXPECT_FALSE(Unstable.hasNonIntegralRepresentation(2)); - EXPECT_TRUE(Unstable.shouldAvoidPtrToInt(2)); - EXPECT_TRUE(Unstable.shouldAvoidIntToPtr(2)); + Custom = cantFail(DataLayout::parse("pu2:64:64:64:32")); + EXPECT_TRUE(Custom.isNonIntegralAddressSpace(2)); + EXPECT_TRUE(Custom.hasUnstableRepresentation(2)); + EXPECT_FALSE(Custom.hasNonIntegralRepresentation(2)); + EXPECT_TRUE(Custom.shouldAvoidPtrToInt(2)); + EXPECT_TRUE(Custom.shouldAvoidIntToPtr(2)); + EXPECT_THAT(Custom.getNonStandardAddressSpaces(), + ::testing::ElementsAreArray({2U})); // Both properties can also be set using 'pnu'/'pun' - for (auto Layout : {"pnu2:64:64:64:32", "pun2:64:64:64:32"}) { + for (const auto *Layout : {"pnu2:64:64:64:32", "pun2:64:64:64:32"}) { DataLayout DL = cantFail(DataLayout::parse(Layout)); EXPECT_TRUE(DL.isNonIntegralAddressSpace(2)); EXPECT_TRUE(DL.hasNonIntegralRepresentation(2)); EXPECT_TRUE(DL.hasUnstableRepresentation(2)); + EXPECT_THAT(DL.getNonStandardAddressSpaces(), + ::testing::ElementsAreArray({2U})); } // For backwards compatibility, the ni DataLayout part overrides any p[n][u]. - for (auto Layout : {"ni:2-pn2:64:64:64:32", "ni:2-pnu2:64:64:64:32", - "ni:2-pu2:64:64:64:32", "pn2:64:64:64:32-ni:2", - "pnu2:64:64:64:32-ni:2", "pu2:64:64:64:32-ni:2"}) { + for (const auto *Layout : {"ni:2-pn2:64:64:64:32", "ni:2-pnu2:64:64:64:32", + "ni:2-pu2:64:64:64:32", "pn2:64:64:64:32-ni:2", + "pnu2:64:64:64:32-ni:2", "pu2:64:64:64:32-ni:2"}) { DataLayout DL = cantFail(DataLayout::parse(Layout)); EXPECT_TRUE(DL.isNonIntegralAddressSpace(2)); EXPECT_TRUE(DL.hasNonIntegralRepresentation(2)); EXPECT_TRUE(DL.hasUnstableRepresentation(2)); + EXPECT_THAT(DL.getNonStandardAddressSpaces(), + ::testing::ElementsAreArray({2U})); } } From de449dd8e32953e59a8e5fc594acee2930e003f9 Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Mon, 21 Jul 2025 13:16:53 -0700 Subject: [PATCH 05/11] clang-format Created using spr 1.3.6-beta.1 --- llvm/include/llvm/IR/DataLayout.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h index 00302ec156126..6f4981e2b65b6 100644 --- a/llvm/include/llvm/IR/DataLayout.h +++ b/llvm/include/llvm/IR/DataLayout.h @@ -429,8 +429,7 @@ class DataLayout { /// representations (hasUnstableRepresentation()) unless the pass knows it is /// within a critical section that retains the current representation. bool shouldAvoidIntToPtr(unsigned AddrSpace) const { - return hasUnstableRepresentation(AddrSpace) || - hasExternalState(AddrSpace); + return hasUnstableRepresentation(AddrSpace) || hasExternalState(AddrSpace); } /// Returns whether passes should avoid introducing `ptrtoint` instructions From 2c49735c0cfd83c731dffbee626e5b9ace29ef0d Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Sun, 27 Jul 2025 13:57:55 -0700 Subject: [PATCH 06/11] typo fixes Created using spr 1.3.6-beta.1 --- llvm/docs/LangRef.rst | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 3f1a0bd2fdc41..ef3464e657031 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -749,22 +749,22 @@ The ``inttoptr`` instruction does not recreate the external state and therefore it is target dependent whether it can be used to create a dereferenceable pointer. In general passes should assume that the result of such an inttoptr is not dereferenceable. For example, on CHERI targets an ``inttoptr`` will -yield a capability the external state (the validity tag bit) set to zero, +yield a capability with the external state (the validity tag bit) set to zero, which will cause any dereference to trap. -The ``ptrtotint`` instruction also only returns the "in-band" state and omit +The ``ptrtoint`` instruction also only returns the "in-band" state and omits all external state. These two properties mean that ``inttoptr(ptrtoint(x))`` cannot be folded to ``x`` since the ``ptrtoint`` operation does not include the external state needed to reconstruct the original pointer and ``inttoptr`` cannot set it. -When a ``store ptr addrspace(N) %p, ptr @dst`` of such a non-integral pointers -is performed, the external metadata is also stored to the implementation-defined +When a ``store ptr addrspace(N) %p, ptr @dst`` of such a non-integral pointer +is performed, the external metadata is also stored to an implementation-defined location. Similarly, a ``%val = load ptr addrspace(N), ptr @dst`` will fetch the external metadata and make it available for all uses of ``%val``. Similarly, the ``llvm.memcpy`` and ``llvm.memmove`` intrinsics also transfer the -external state. This is essential to allow frontends to efficiently emit of -copies of structures containing such pointers, since expanding all these copies -as individual loads and stores would affect compilation speed and inhibit +external state. This is essential to allow frontends to efficiently emit copies +of structures containing such pointers, since expanding all these copies as +individual loads and stores would affect compilation speed and inhibit optimizations. Notionally, these external bits are part of the pointer, but since From a08d1f91b74bad03c33903b0dbe8374a9d8b3873 Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Fri, 19 Sep 2025 18:22:24 -0700 Subject: [PATCH 07/11] fix typo in langref Created using spr 1.3.7-beta.1 --- llvm/docs/LangRef.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index baac35c551eab..a3e9f60dab515 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -725,7 +725,7 @@ representation of the pointer. Non-integral pointers with external state ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -A futher special case of non-integral pointers is ones that include external +A further special case of non-integral pointers is ones that include external state (such as bounds information or a type tag) with a target-defined size. An example of such a type is a CHERI capability, where there is an additional validity bit that is part of all pointer-typed registers, but is located in From e740d606ebad2ea1810ce08a5785e83fd7242efe Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Fri, 19 Sep 2025 18:27:00 -0700 Subject: [PATCH 08/11] fix tests after semantic change Created using spr 1.3.7-beta.1 --- .../ConstProp/inttoptr-gep-index-width.ll | 4 +++- .../SimplifyCFG/switch_create-custom-dl.ll | 20 +++++++++---------- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/inttoptr-gep-index-width.ll b/llvm/test/Transforms/InstSimplify/ConstProp/inttoptr-gep-index-width.ll index 03056e8361e21..2049def9b59b7 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/inttoptr-gep-index-width.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/inttoptr-gep-index-width.ll @@ -6,7 +6,9 @@ target datalayout = "p:16:16:16:8" ; The GEP should only modify the low 8 bits of the pointer. define ptr @test() { ; CHECK-LABEL: define ptr @test() { -; CHECK-NEXT: ret ptr inttoptr (i16 -256 to ptr) +; We need to use finer-grained DataLayout properties for non-integral pointers +; FIXME: Should be: ret ptr inttoptr (i16 -256 to ptr) +; CHECK-NEXT: ret ptr getelementptr (i8, ptr inttoptr (i16 -1 to ptr), i8 1) ; %base = inttoptr i16 -1 to ptr %gep = getelementptr i8, ptr %base, i8 1 diff --git a/llvm/test/Transforms/SimplifyCFG/switch_create-custom-dl.ll b/llvm/test/Transforms/SimplifyCFG/switch_create-custom-dl.ll index 336fc5e14d758..ddf64591776dd 100644 --- a/llvm/test/Transforms/SimplifyCFG/switch_create-custom-dl.ll +++ b/llvm/test/Transforms/SimplifyCFG/switch_create-custom-dl.ll @@ -33,13 +33,14 @@ F: ; preds = %0 ret void } +; We need to use finer-grained DataLayout properties for non-integral pointers +; FIXME: Should be using a switch here define void @test1_ptr(ptr %V) { ; CHECK-LABEL: @test1_ptr( -; CHECK-NEXT: [[MAGICPTR:%.*]] = ptrtoint ptr [[V:%.*]] to i40 -; CHECK-NEXT: switch i40 [[MAGICPTR]], label [[F:%.*]] [ -; CHECK-NEXT: i40 17, label [[T:%.*]] -; CHECK-NEXT: i40 4, label [[T]] -; CHECK-NEXT: ] +; CHECK-NEXT: [[C1:%.*]] = icmp eq ptr [[V:%.*]], inttoptr (i32 4 to ptr) +; CHECK-NEXT: [[C2:%.*]] = icmp eq ptr [[V]], inttoptr (i32 17 to ptr) +; CHECK-NEXT: [[CN:%.*]] = or i1 [[C1]], [[C2]] +; CHECK-NEXT: br i1 [[CN]], label [[T:%.*]], label [[F:%.*]] ; CHECK: common.ret: ; CHECK-NEXT: ret void ; CHECK: T: @@ -63,11 +64,10 @@ F: ; preds = %0 define void @test1_ptr_as1(ptr addrspace(1) %V) { ; CHECK-LABEL: @test1_ptr_as1( -; CHECK-NEXT: [[MAGICPTR:%.*]] = ptrtoint ptr addrspace(1) [[V:%.*]] to i40 -; CHECK-NEXT: switch i40 [[MAGICPTR]], label [[F:%.*]] [ -; CHECK-NEXT: i40 17, label [[T:%.*]] -; CHECK-NEXT: i40 4, label [[T]] -; CHECK-NEXT: ] +; CHECK-NEXT: [[C1:%.*]] = icmp eq ptr addrspace(1) [[V:%.*]], inttoptr (i32 4 to ptr addrspace(1)) +; CHECK-NEXT: [[C2:%.*]] = icmp eq ptr addrspace(1) [[V]], inttoptr (i32 17 to ptr addrspace(1)) +; CHECK-NEXT: [[CN:%.*]] = or i1 [[C1]], [[C2]] +; CHECK-NEXT: br i1 [[CN]], label [[T:%.*]], label [[F:%.*]] ; CHECK: common.ret: ; CHECK-NEXT: ret void ; CHECK: T: From 4fee21fb0ed600e32556143fc2063c0f52f947b0 Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Fri, 19 Sep 2025 23:06:54 -0700 Subject: [PATCH 09/11] rebase, add Type* overloads Created using spr 1.3.7-beta.1 --- llvm/include/llvm/IR/DataLayout.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h index 51181fc292fe4..d7574365b4351 100644 --- a/llvm/include/llvm/IR/DataLayout.h +++ b/llvm/include/llvm/IR/DataLayout.h @@ -404,6 +404,10 @@ class DataLayout { bool hasUnstableRepresentation(unsigned AddrSpace) const { return getPointerSpec(AddrSpace).HasUnstableRepresentation; } + bool hasUnstableRepresentation(Type *Ty) const { + auto *PTy = dyn_cast(Ty->getScalarType()); + return PTy && hasUnstableRepresentation(PTy->getPointerAddressSpace()); + } /// Returns whether this address space has external state (implies having /// a non-integral pointer representation). @@ -416,6 +420,10 @@ class DataLayout { bool hasExternalState(unsigned AddrSpace) const { return getPointerSpec(AddrSpace).HasExternalState; } + bool hasExternalState(Type *Ty) const { + auto *PTy = dyn_cast(Ty->getScalarType()); + return PTy && hasExternalState(PTy->getPointerAddressSpace()); + } /// Returns whether passes should avoid introducing `inttoptr` instructions /// for this address space. From 82c5832b9d66c2b6d0ddca50390da10639b9de61 Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Sun, 21 Sep 2025 14:25:51 -0700 Subject: [PATCH 10/11] remove no longer valid test check Created using spr 1.3.8-beta.1 --- llvm/unittests/IR/DataLayoutTest.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/llvm/unittests/IR/DataLayoutTest.cpp b/llvm/unittests/IR/DataLayoutTest.cpp index e0a87b8cfa731..7612674ed0099 100644 --- a/llvm/unittests/IR/DataLayoutTest.cpp +++ b/llvm/unittests/IR/DataLayoutTest.cpp @@ -415,10 +415,6 @@ TEST(DataLayout, ParsePointerSpec) { DataLayout::parse("p2n:32:32"), FailedWithMessage("address space must be a 24-bit integer")); - EXPECT_THAT_EXPECTED( - DataLayout::parse("pe2:64:64"), - FailedWithMessage("pointers with external state must be non-integral")); - // AS0 cannot be non-integral. for (StringRef Str : {"pe:64:64", "pu:64:64", "pue:64:64", "pe0:64:64", "pu0:64:64", "peu0:64:64"}) From faf0565e442cbbdf0d540e9b1a28752c217a6b7b Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Mon, 22 Sep 2025 11:45:57 -0700 Subject: [PATCH 11/11] typo fix Created using spr 1.3.8-beta.1 --- llvm/include/llvm/IR/DataLayout.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h index d72e364bdfa8f..56fc749838ef9 100644 --- a/llvm/include/llvm/IR/DataLayout.h +++ b/llvm/include/llvm/IR/DataLayout.h @@ -388,7 +388,7 @@ class DataLayout { /// and an out-of-band validity bit. /// /// In general, more specialized functions such as mustNotIntroduceIntToPtr(), - /// mustNotIntroducePtrToIntPtrToInt(), or hasExternalState() should be + /// mustNotIntroducePtrToInt(), or hasExternalState() should be /// preferred over this one when reasoning about the behavior of IR /// analysis/transforms. /// TODO: should remove/deprecate this once all uses have migrated. @@ -447,7 +447,7 @@ class DataLayout { /// "unstable" representation (hasUnstableRepresentation()) since the /// bitwise pattern of such pointers could change unless the pass knows it is /// within a critical section that retains the current representation. - bool mustNotIntroducePtrToIntPtrToInt(unsigned AddrSpace) const { + bool mustNotIntroducePtrToInt(unsigned AddrSpace) const { return hasUnstableRepresentation(AddrSpace); } @@ -460,10 +460,9 @@ class DataLayout { return PTy && isNonIntegralPointerType(PTy); } - bool mustNotIntroducePtrToIntPtrToInt(Type *Ty) const { + bool mustNotIntroducePtrToInt(Type *Ty) const { auto *PTy = dyn_cast(Ty->getScalarType()); - return PTy && - mustNotIntroducePtrToIntPtrToInt(PTy->getPointerAddressSpace()); + return PTy && mustNotIntroducePtrToInt(PTy->getPointerAddressSpace()); } bool mustNotIntroduceIntToPtr(Type *Ty) const {