Skip to content

Commit 183f1ac

Browse files
authored
[LLVM] Fix an ASAN error in lookupLLVMIntrinsicByName (#147444)
Fix unnecessary conversion of C-String to StringRef in the `Cmp` lambda inside `lookupLLVMIntrinsicByName`. This both fixes an ASAN error in the code that happens when the `Name` StringRef passed in is not a Null terminated StringRef, and additionally can potentially speed up the code as well by eliminating the unnecessary computation of string length every time a C String is converted to StringRef in this code (It seems practically this computation is eliminated in optimized builds, but this will avoid it in O0 builds as well). Added a unit test that demonstrates this issue by building LLVM with these options: ``` CMAKE_BUILD_TYPE=Debug LLVM_USE_SANITIZER=Address LLVM_OPTIMIZE_SANITIZED_BUILDS=OFF ``` The error reported is as follows: ``` ==462665==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x5030000391a2 at pc 0x56525cc30bbf bp 0x7fff9e4ccc60 sp 0x7fff9e4cc428 READ of size 19 at 0x5030000391a2 thread T0 #0 0x56525cc30bbe in strlen (upstream-llvm-second/llvm-project/build/unittests/IR/IRTests+0x713bbe) (BuildId: 0651acf1e582a4d2) #1 0x7f8ff22ad334 in std::char_traits<char>::length(char const*) /usr/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/char_traits.h:399:9 #2 0x7f8ff22a34a0 in llvm::StringRef::StringRef(char const*) /home/rjoshi/upstream-llvm-second/llvm-project/llvm/include/llvm/ADT/StringRef.h:96:33 #3 0x7f8ff28ca184 in _ZZL25lookupLLVMIntrinsicByNameN4llvm8ArrayRefIjEENS_9StringRefES2_ENK3$_0clIjPKcEEDaT_T0_ upstream-llvm-second/llvm-project/llvm/lib/IR/Intrinsics.cpp:673:18 ```
1 parent c9f03b8 commit 183f1ac

File tree

3 files changed

+46
-16
lines changed

3 files changed

+46
-16
lines changed

llvm/include/llvm/ADT/StringTable.h

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -85,14 +85,18 @@ class StringTable {
8585
"Last byte must be a null byte.");
8686
}
8787

88-
// Get a string from the table starting with the provided offset. The returned
89-
// `StringRef` is in fact null terminated, and so can be converted safely to a
90-
// C-string if necessary for a system API.
91-
constexpr StringRef operator[](Offset O) const {
88+
// Returns the raw C string from the table starting with the provided offset.
89+
// The returned string is null terminated.
90+
constexpr const char *getCString(Offset O) const {
9291
assert(O.value() < Table.size() && "Out of bounds offset!");
9392
return Table.data() + O.value();
9493
}
9594

95+
// Get a string from the table starting with the provided offset. The returned
96+
// `StringRef` is in fact null terminated, and so can be converted safely to a
97+
// C-string if necessary for a system API.
98+
constexpr StringRef operator[](Offset O) const { return getCString(O); }
99+
96100
/// Returns the byte size of the table.
97101
constexpr size_t size() const { return Table.size(); }
98102

llvm/lib/IR/Intrinsics.cpp

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -661,20 +661,20 @@ static int lookupLLVMIntrinsicByName(ArrayRef<unsigned> NameOffsetTable,
661661
// `equal_range` requires the comparison to work with either side being an
662662
// offset or the value. Detect which kind each side is to set up the
663663
// compared strings.
664-
StringRef LHSStr;
665-
if constexpr (std::is_integral_v<decltype(LHS)>) {
666-
LHSStr = IntrinsicNameTable[LHS];
667-
} else {
664+
const char *LHSStr;
665+
if constexpr (std::is_integral_v<decltype(LHS)>)
666+
LHSStr = IntrinsicNameTable.getCString(LHS);
667+
else
668668
LHSStr = LHS;
669-
}
670-
StringRef RHSStr;
671-
if constexpr (std::is_integral_v<decltype(RHS)>) {
672-
RHSStr = IntrinsicNameTable[RHS];
673-
} else {
669+
670+
const char *RHSStr;
671+
if constexpr (std::is_integral_v<decltype(RHS)>)
672+
RHSStr = IntrinsicNameTable.getCString(RHS);
673+
else
674674
RHSStr = RHS;
675-
}
676-
return strncmp(LHSStr.data() + CmpStart, RHSStr.data() + CmpStart,
677-
CmpEnd - CmpStart) < 0;
675+
676+
return strncmp(LHSStr + CmpStart, RHSStr + CmpStart, CmpEnd - CmpStart) <
677+
0;
678678
};
679679
LastLow = Low;
680680
std::tie(Low, High) = std::equal_range(Low, High, Name.data(), Cmp);

llvm/unittests/IR/IntrinsicsTest.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,32 @@ TEST(IntrinsicNameLookup, Basic) {
8080
EXPECT_EQ(memcpy_inline, lookupIntrinsicID("llvm.memcpy.inline.p0.p0.i1024"));
8181
}
8282

83+
TEST(IntrinsicNameLookup, NonNullterminatedStringRef) {
84+
using namespace Intrinsic;
85+
// This reproduces an issue where lookupIntrinsicID() can access memory beyond
86+
// the bounds of the passed in StringRef. For ASAN to catch this as an error,
87+
// create a StringRef using heap allocated memory and make it not null
88+
// terminated.
89+
90+
// ASAN will report a "AddressSanitizer: heap-buffer-overflow" error in
91+
// `lookupLLVMIntrinsicByName` when LLVM is built with these options:
92+
// -DCMAKE_BUILD_TYPE=Debug
93+
// -DLLVM_USE_SANITIZER=Address
94+
// -DLLVM_OPTIMIZE_SANITIZED_BUILDS=OFF
95+
96+
// Make an intrinsic name "llvm.memcpy.inline" on the heap.
97+
std::string Name = "llvm.memcpy.inline";
98+
assert(Name.size() == 18);
99+
// Create a StringRef backed by heap allocated memory such that OOB access
100+
// in that StringRef can be flagged by asan. Here, the String `S` is of size
101+
// 18, and backed by a heap allocated buffer `Data`, so access to S[18] will
102+
// be flagged bby asan.
103+
auto Data = std::make_unique<char[]>(Name.size());
104+
std::strncpy(Data.get(), Name.data(), Name.size());
105+
StringRef S(Data.get(), Name.size());
106+
EXPECT_EQ(memcpy_inline, lookupIntrinsicID(S));
107+
}
108+
83109
// Tests to verify getIntrinsicForClangBuiltin.
84110
TEST(IntrinsicNameLookup, ClangBuiltinLookup) {
85111
using namespace Intrinsic;

0 commit comments

Comments
 (0)