Skip to content

Commit 16f475a

Browse files
author
Advenam Tacet
committed
[ASan][libc++] Turn on ASan annotations for short strings
This commit turns on ASan annotations in `std::basic_string` for short stings (SSO case). Originally suggested here: https://reviews.llvm.org/D147680 String annotations added here: #72677 Annotating `std::basic_string` with default allocator is implemented in #72677 but annotations for short strings (SSO - Short String Optimization) are turned off there. This commit turns them on. This also removes `_LIBCPP_SHORT_STRING_ANNOTATIONS_ALLOWED`, because we do not plan to support turning on and off short string annotations. Support in ASan API exists since dd1b7b7. You can turn off annotations for a specific allocator based on changes from 2fa1bec. This PR is a part of a series of patches extending AddressSanitizer C++ container overflow detection capabilities by adding annotations, similar to those existing in `std::vector` and `std::deque` collections. These enhancements empower ASan to effectively detect instances where the instrumented program attempts to access memory within a collection's internal allocation that remains unused. This includes cases where access occurs before or after the stored elements in `std::deque`, or between the `std::basic_string`'s size (including the null terminator) and capacity bounds. The introduction of these annotations was spurred by a real-world software bug discovered by Trail of Bits, involving an out-of-bounds memory access during the comparison of two strings using the `std::equals` function. This function was taking iterators (`iter1_begin`, `iter1_end`, `iter2_begin`) to perform the comparison, using a custom comparison function. When the `iter1` object exceeded the length of `iter2`, an out-of-bounds read could occur on the `iter2` object. Container sanitization, upon enabling these annotations, would effectively identify and flag this potential vulnerability. If you have any questions, please email: [email protected] [email protected]
1 parent 31fd6d1 commit 16f475a

File tree

5 files changed

+420
-13
lines changed

5 files changed

+420
-13
lines changed

libcxx/include/string

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -659,7 +659,6 @@ _LIBCPP_PUSH_MACROS
659659
#else
660660
# define _LIBCPP_STRING_INTERNAL_MEMORY_ACCESS
661661
#endif
662-
#define _LIBCPP_SHORT_STRING_ANNOTATIONS_ALLOWED false
663662

664663
_LIBCPP_BEGIN_NAMESPACE_STD
665664

@@ -1899,7 +1898,7 @@ private:
18991898

19001899
// ASan: short string is poisoned if and only if this function returns true.
19011900
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __asan_short_string_is_annotated() const _NOEXCEPT {
1902-
return _LIBCPP_SHORT_STRING_ANNOTATIONS_ALLOWED && !__libcpp_is_constant_evaluated();
1901+
return !__libcpp_is_constant_evaluated();
19031902
}
19041903

19051904
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __annotate_new(size_type __current_size) const _NOEXCEPT {
Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
// REQUIRES: asan
10+
// UNSUPPORTED: c++03
11+
12+
#include <cassert>
13+
#include <array>
14+
#include "test_macros.h"
15+
#include "asan_testing.h" // includes deque and string - don't do it before
16+
17+
// This tests exists to check if strings work well with deque, as those
18+
// may be partialy annotated, we cannot simply call
19+
// is_double_ended_contiguous_container_asan_correct, as it assumes that
20+
// object memory inside is not annotated, so we check everything in a more careful way.
21+
22+
template <typename D>
23+
bool verify_inside(D const& d) {
24+
for (size_t i = 0; i < d.size(); ++i) {
25+
if (!is_string_asan_correct(d[i]))
26+
return false;
27+
}
28+
29+
return true;
30+
}
31+
32+
template <typename S, size_t N>
33+
S get_s(char c) {
34+
S s;
35+
for (size_t i = 0; i < N; ++i)
36+
s.push_back(c);
37+
38+
return s;
39+
}
40+
41+
template <class C, class S>
42+
void test_string() {
43+
size_t const N = sizeof(S) < 256 ? (4096 / sizeof(S)) : 16;
44+
45+
{
46+
C d1a(1), d1b(N), d1c(N + 1), d1d(32 * N);
47+
assert(verify_inside(d1a));
48+
assert(verify_inside(d1b));
49+
assert(verify_inside(d1c));
50+
assert(verify_inside(d1d));
51+
}
52+
{
53+
C d2;
54+
for (size_t i = 0; i < 16 * N; ++i) {
55+
d2.push_back(get_s<S, 1>(i % 10 + 'a'));
56+
assert(verify_inside(d2));
57+
d2.push_back(get_s<S, 222>(i % 10 + 'b'));
58+
assert(verify_inside(d2));
59+
60+
d2.pop_front();
61+
assert(verify_inside(d2));
62+
}
63+
}
64+
{
65+
C d3;
66+
for (size_t i = 0; i < 16 * N; ++i) {
67+
d3.push_front(get_s<S, 1>(i % 10 + 'a'));
68+
assert(verify_inside(d3));
69+
d3.push_front(get_s<S, 222>(i % 10 + 'b'));
70+
assert(verify_inside(d3));
71+
72+
d3.pop_back();
73+
assert(verify_inside(d3));
74+
}
75+
}
76+
{
77+
C d4;
78+
for (size_t i = 0; i < 16 * N; ++i) {
79+
// When there is no SSO, all elements inside should not be poisoned,
80+
// so we can verify deque poisoning.
81+
d4.push_front(get_s<S, 333>(i % 10 + 'a'));
82+
assert(verify_inside(d4));
83+
assert(is_double_ended_contiguous_container_asan_correct(d4));
84+
d4.push_back(get_s<S, 222>(i % 10 + 'b'));
85+
assert(verify_inside(d4));
86+
assert(is_double_ended_contiguous_container_asan_correct(d4));
87+
}
88+
}
89+
{
90+
C d5;
91+
for (size_t i = 0; i < 5 * N; ++i) {
92+
// In d4 we never had poisoned memory inside deque.
93+
// Here we start with SSO, so part of the inside of the container,
94+
// will be poisoned.
95+
d5.push_front(S());
96+
assert(verify_inside(d5));
97+
}
98+
for (size_t i = 0; i < d5.size(); ++i) {
99+
// We change the size to have long string.
100+
// Memory owne by deque should not be poisoned by string.
101+
d5[i].resize(1000);
102+
assert(verify_inside(d5));
103+
}
104+
105+
assert(is_double_ended_contiguous_container_asan_correct(d5));
106+
107+
d5.erase(d5.begin() + 2);
108+
assert(verify_inside(d5));
109+
110+
d5.erase(d5.end() - 2);
111+
assert(verify_inside(d5));
112+
113+
assert(is_double_ended_contiguous_container_asan_correct(d5));
114+
}
115+
{
116+
C d6a;
117+
assert(is_double_ended_contiguous_container_asan_correct(d6a));
118+
119+
C d6b(N + 2, get_s<S, 1000>('a'));
120+
d6b.push_front(get_s<S, 1001>('b'));
121+
while (!d6b.empty()) {
122+
d6b.pop_back();
123+
assert(is_double_ended_contiguous_container_asan_correct(d6b));
124+
}
125+
126+
C d6c(N + 2, get_s<S, 1002>('c'));
127+
while (!d6c.empty()) {
128+
d6c.pop_back();
129+
assert(is_double_ended_contiguous_container_asan_correct(d6c));
130+
}
131+
}
132+
{
133+
C d7(9 * N + 2);
134+
135+
d7.insert(d7.begin() + 1, S());
136+
assert(verify_inside(d7));
137+
138+
d7.insert(d7.end() - 3, S());
139+
assert(verify_inside(d7));
140+
141+
d7.insert(d7.begin() + 2 * N, get_s<S, 1>('a'));
142+
assert(verify_inside(d7));
143+
144+
d7.insert(d7.end() - 2 * N, get_s<S, 1>('b'));
145+
assert(verify_inside(d7));
146+
147+
d7.insert(d7.begin() + 2 * N, 3 * N, get_s<S, 1>('c'));
148+
assert(verify_inside(d7));
149+
150+
// It may not be short for big element types, but it will be checked correctly:
151+
d7.insert(d7.end() - 2 * N, 3 * N, get_s<S, 2>('d'));
152+
assert(verify_inside(d7));
153+
154+
d7.erase(d7.begin() + 2);
155+
assert(verify_inside(d7));
156+
157+
d7.erase(d7.end() - 2);
158+
assert(verify_inside(d7));
159+
}
160+
}
161+
162+
template <class S>
163+
void test_container() {
164+
test_string<std::deque<S, std::allocator<S>>, S>();
165+
test_string<std::deque<S, min_allocator<S>>, S>();
166+
test_string<std::deque<S, safe_allocator<S>>, S>();
167+
}
168+
169+
int main(int, char**) {
170+
// Those tests support only types based on std::basic_string.
171+
test_container<std::string>();
172+
test_container<std::wstring>();
173+
#if TEST_STD_VER >= 11
174+
test_container<std::u16string>();
175+
test_container<std::u32string>();
176+
#endif
177+
#if TEST_STD_VER >= 20
178+
test_container<std::u8string>();
179+
#endif
180+
}
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
// REQUIRES: asan
10+
// UNSUPPORTED: c++03
11+
12+
// <string>
13+
14+
// Basic test if ASan annotations work for short strings.
15+
16+
#include <string>
17+
#include <cassert>
18+
#include <cstdlib>
19+
20+
#include "asan_testing.h"
21+
#include "min_allocator.h"
22+
#include "test_iterators.h"
23+
#include "test_macros.h"
24+
25+
extern "C" void __sanitizer_set_death_callback(void (*callback)(void));
26+
27+
void do_exit() { exit(0); }
28+
29+
int main(int, char**) {
30+
{
31+
typedef cpp17_input_iterator<char*> MyInputIter;
32+
// Should not trigger ASan.
33+
std::basic_string<char, std::char_traits<char>, safe_allocator<char>> v;
34+
char i[] = {'a', 'b', 'c', 'd'};
35+
36+
v.insert(v.begin(), MyInputIter(i), MyInputIter(i + 4));
37+
assert(v[0] == 'a');
38+
assert(is_string_asan_correct(v));
39+
}
40+
41+
__sanitizer_set_death_callback(do_exit);
42+
{
43+
using T = char;
44+
using C = std::basic_string<T, std::char_traits<T>, safe_allocator<T>>;
45+
const T t[] = {'a', 'b', 'c', 'd', 'e', 'f', 'g'};
46+
C c(std::begin(t), std::end(t));
47+
assert(is_string_asan_correct(c));
48+
assert(__sanitizer_verify_contiguous_container(c.data(), c.data() + c.size() + 1, c.data() + c.capacity() + 1) !=
49+
0);
50+
volatile T foo = c[c.size() + 1]; // should trigger ASAN. Use volatile to prevent being optimized away.
51+
assert(false); // if we got here, ASAN didn't trigger
52+
((void)foo);
53+
}
54+
}

0 commit comments

Comments
 (0)