Skip to content

Commit 2d0a3da

Browse files
committed
[lldb] Support parsing data symbols from the Wasm name section
This PR adds support for parsing data symbols from the WebAssembly name section.
1 parent 63cc2e3 commit 2d0a3da

File tree

3 files changed

+161
-45
lines changed

3 files changed

+161
-45
lines changed

lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp

Lines changed: 92 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -251,19 +251,19 @@ bool ObjectFileWasm::ParseHeader() {
251251

252252
static llvm::Expected<std::vector<AddressRange>>
253253
ParseFunctions(SectionSP code_section_sp) {
254-
DataExtractor code_section_data;
255-
code_section_sp->GetSectionData(code_section_data);
254+
DataExtractor data;
255+
code_section_sp->GetSectionData(data);
256256
lldb::offset_t offset = 0;
257257

258-
const uint64_t function_count = code_section_data.GetULEB128(&offset);
258+
const uint64_t function_count = data.GetULEB128(&offset);
259259
if (function_count > std::numeric_limits<uint32_t>::max())
260260
return llvm::createStringError("function count overflows uint32_t");
261261

262262
std::vector<AddressRange> functions;
263263
functions.reserve(function_count);
264264

265265
for (uint32_t i = 0; i < function_count; ++i) {
266-
const uint64_t function_size = code_section_data.GetULEB128(&offset);
266+
const uint64_t function_size = data.GetULEB128(&offset);
267267
if (function_size > std::numeric_limits<uint32_t>::max())
268268
return llvm::createStringError("function size overflows uint32_t");
269269
// llvm-objdump considers the ULEB with the function size to be part of the
@@ -281,9 +281,45 @@ ParseFunctions(SectionSP code_section_sp) {
281281
return functions;
282282
}
283283

284+
static llvm::Expected<std::vector<AddressRange>>
285+
ParseData(SectionSP data_section_sp) {
286+
DataExtractor data;
287+
data_section_sp->GetSectionData(data);
288+
289+
lldb::offset_t offset = 0;
290+
291+
const uint64_t segment_count = data.GetULEB128(&offset);
292+
if (segment_count > std::numeric_limits<uint32_t>::max())
293+
return llvm::createStringError("segment count overflows uint32_t");
294+
295+
std::vector<AddressRange> segments;
296+
segments.reserve(segment_count);
297+
298+
for (uint32_t i = 0; i < segment_count; ++i) {
299+
const uint64_t flags = data.GetULEB128(&offset);
300+
if (flags > std::numeric_limits<uint32_t>::max())
301+
return llvm::createStringError("segment flags overflows uint32_t");
302+
303+
const uint64_t segment_size = data.GetULEB128(&offset);
304+
if (flags > std::numeric_limits<uint32_t>::max())
305+
return llvm::createStringError("segment size overflows uint32_t");
306+
307+
segments.emplace_back(data_section_sp, offset, segment_size);
308+
309+
std::optional<lldb::offset_t> next_offset =
310+
llvm::checkedAddUnsigned(offset, segment_size);
311+
if (!next_offset)
312+
return llvm::createStringError("segment offset overflows uint64_t");
313+
offset = *next_offset;
314+
}
315+
316+
return segments;
317+
}
318+
284319
static llvm::Expected<std::vector<Symbol>>
285320
ParseNames(SectionSP name_section_sp,
286-
const std::vector<AddressRange> &functions) {
321+
const std::vector<AddressRange> &function_ranges,
322+
const std::vector<AddressRange> &segment_ranges) {
287323
DataExtractor name_section_data;
288324
name_section_sp->GetSectionData(name_section_data);
289325

@@ -305,17 +341,34 @@ ParseNames(SectionSP name_section_sp,
305341
for (uint64_t i = 0; c && i < count; ++i) {
306342
const uint64_t idx = data.getULEB128(c);
307343
const std::optional<std::string> name = GetWasmString(data, c);
308-
if (!name || idx >= functions.size())
344+
if (!name || idx >= function_ranges.size())
309345
continue;
310346
symbols.emplace_back(
311347
symbols.size(), Mangled(*name), lldb::eSymbolTypeCode,
312348
/*external=*/false, /*is_debug=*/false, /*is_trampoline=*/false,
313-
/*is_artificial=*/false, functions[idx],
349+
/*is_artificial=*/false, function_ranges[idx],
314350
/*size_is_valid=*/true, /*contains_linker_annotations=*/false,
315351
/*flags=*/0);
316352
}
317353
} break;
318-
case llvm::wasm::WASM_NAMES_DATA_SEGMENT:
354+
case llvm::wasm::WASM_NAMES_DATA_SEGMENT: {
355+
const uint64_t count = data.getULEB128(c);
356+
if (count > std::numeric_limits<uint32_t>::max())
357+
return llvm::createStringError("data count overflows uint32_t");
358+
for (uint64_t i = 0; c && i < count; ++i) {
359+
const uint64_t idx = data.getULEB128(c);
360+
const std::optional<std::string> name = GetWasmString(data, c);
361+
if (!name || idx >= segment_ranges.size())
362+
continue;
363+
symbols.emplace_back(
364+
symbols.size(), Mangled(*name), lldb::eSymbolTypeData,
365+
/*external=*/false, /*is_debug=*/false, /*is_trampoline=*/false,
366+
/*is_artificial=*/false, segment_ranges[idx],
367+
/*size_is_valid=*/true, /*contains_linker_annotations=*/false,
368+
/*flags=*/0);
369+
}
370+
371+
} break;
319372
case llvm::wasm::WASM_NAMES_GLOBAL:
320373
case llvm::wasm::WASM_NAMES_LOCAL:
321374
default:
@@ -336,21 +389,35 @@ void ObjectFileWasm::ParseSymtab(Symtab &symtab) {
336389
assert(m_sections_up && "sections must be parsed");
337390
Log *log = GetLog(LLDBLog::Object);
338391

339-
// The name section contains names and indexes. First parse the functions from
340-
// the code section so we can access them by their index.
341-
SectionSP code_section_sp =
342-
m_sections_up->FindSectionByType(lldb::eSectionTypeCode, false);
343-
if (!code_section_sp) {
344-
LLDB_LOG(log, "Failed to parse Wasm symbol table: no functions section");
345-
return;
392+
// The name section contains names and indexes. First parse the data from the
393+
// relevant sections so we can access it by its index.
394+
std::vector<AddressRange> function_ranges;
395+
std::vector<AddressRange> segment_ranges;
396+
397+
// Parse the code section.
398+
if (SectionSP code_section_sp =
399+
m_sections_up->FindSectionByType(lldb::eSectionTypeCode, false)) {
400+
llvm::Expected<std::vector<AddressRange>> functions =
401+
ParseFunctions(code_section_sp);
402+
if (!functions) {
403+
LLDB_LOG_ERROR(log, functions.takeError(),
404+
"Failed to parse Wasm code section: {0}");
405+
return;
406+
}
407+
function_ranges = *functions;
346408
}
347409

348-
llvm::Expected<std::vector<AddressRange>> functions =
349-
ParseFunctions(code_section_sp);
350-
if (!functions) {
351-
LLDB_LOG_ERROR(log, functions.takeError(),
352-
"Failed to parse Wasm functions: {0}");
353-
return;
410+
// Parse the data section.
411+
if (SectionSP data_section_sp =
412+
m_sections_up->FindSectionByType(lldb::eSectionTypeData, false)) {
413+
llvm::Expected<std::vector<AddressRange>> segments =
414+
ParseData(data_section_sp);
415+
if (!segments) {
416+
LLDB_LOG_ERROR(log, segments.takeError(),
417+
"Failed to parse Wasm data section: {0}");
418+
return;
419+
}
420+
segment_ranges = *segments;
354421
}
355422

356423
// Parse the name section.
@@ -362,7 +429,7 @@ void ObjectFileWasm::ParseSymtab(Symtab &symtab) {
362429
}
363430

364431
llvm::Expected<std::vector<Symbol>> symbols =
365-
ParseNames(name_section_sp, *functions);
432+
ParseNames(name_section_sp, function_ranges, segment_ranges);
366433
if (!symbols) {
367434
LLDB_LOG_ERROR(log, symbols.takeError(), "Failed to parse Wasm names: {0}");
368435
return;
@@ -408,6 +475,9 @@ void ObjectFileWasm::CreateSections(SectionList &unified_section_list) {
408475
// For this reason Section::GetFileAddress() must return zero for the
409476
// Code section.
410477
vm_addr = 0;
478+
} else if (llvm::wasm::WASM_SEC_DATA == sect_info.id) {
479+
section_type = eSectionTypeData;
480+
section_name = ConstString("data");
411481
} else {
412482
section_type = GetSectionTypeFromName(sect_info.name.GetStringRef());
413483
if (section_type == eSectionTypeOther)

lldb/test/Shell/Symtab/Inputs/simple.wasm.yaml

Lines changed: 63 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,15 @@
1+
# clang -target wasm32 -nostdlib -Wl,--no-entry -Wl,--export-all -O0 -g -o simple.wasm simple.c
2+
# char* str = "data str";
3+
#
4+
# int add(int a, int b) {
5+
# return a + b;
6+
# }
7+
#
8+
# int main() {
9+
# int i = 1;
10+
# int j = 2;
11+
# return add(i, j);
12+
# }
113
--- !WASM
214
FileHeader:
315
Version: 0x1
@@ -37,13 +49,13 @@ Sections:
3749
Mutable: true
3850
InitExpr:
3951
Opcode: I32_CONST
40-
Value: 66560
52+
Value: 66576
4153
- Index: 1
4254
Type: I32
4355
Mutable: false
4456
InitExpr:
4557
Opcode: I32_CONST
46-
Value: 1024
58+
Value: 1036
4759
- Index: 2
4860
Type: I32
4961
Mutable: false
@@ -55,44 +67,50 @@ Sections:
5567
Mutable: false
5668
InitExpr:
5769
Opcode: I32_CONST
58-
Value: 1024
70+
Value: 1040
5971
- Index: 4
6072
Type: I32
6173
Mutable: false
6274
InitExpr:
6375
Opcode: I32_CONST
64-
Value: 66560
76+
Value: 1040
6577
- Index: 5
6678
Type: I32
6779
Mutable: false
6880
InitExpr:
6981
Opcode: I32_CONST
70-
Value: 1024
82+
Value: 66576
7183
- Index: 6
7284
Type: I32
7385
Mutable: false
7486
InitExpr:
7587
Opcode: I32_CONST
76-
Value: 66560
88+
Value: 1024
7789
- Index: 7
7890
Type: I32
7991
Mutable: false
8092
InitExpr:
8193
Opcode: I32_CONST
82-
Value: 131072
94+
Value: 66576
8395
- Index: 8
8496
Type: I32
8597
Mutable: false
8698
InitExpr:
8799
Opcode: I32_CONST
88-
Value: 0
100+
Value: 131072
89101
- Index: 9
90102
Type: I32
91103
Mutable: false
92104
InitExpr:
93105
Opcode: I32_CONST
94-
Value: 1
106+
Value: 0
95107
- Index: 10
108+
Type: I32
109+
Mutable: false
110+
InitExpr:
111+
Opcode: I32_CONST
112+
Value: 1
113+
- Index: 11
96114
Type: I32
97115
Mutable: false
98116
InitExpr:
@@ -115,6 +133,9 @@ Sections:
115133
- Name: main
116134
Kind: FUNCTION
117135
Index: 3
136+
- Name: str
137+
Kind: GLOBAL
138+
Index: 1
118139
- Name: __main_void
119140
Kind: FUNCTION
120141
Index: 2
@@ -123,34 +144,34 @@ Sections:
123144
Index: 0
124145
- Name: __dso_handle
125146
Kind: GLOBAL
126-
Index: 1
147+
Index: 2
127148
- Name: __data_end
128149
Kind: GLOBAL
129-
Index: 2
150+
Index: 3
130151
- Name: __stack_low
131152
Kind: GLOBAL
132-
Index: 3
153+
Index: 4
133154
- Name: __stack_high
134155
Kind: GLOBAL
135-
Index: 4
156+
Index: 5
136157
- Name: __global_base
137158
Kind: GLOBAL
138-
Index: 5
159+
Index: 6
139160
- Name: __heap_base
140161
Kind: GLOBAL
141-
Index: 6
162+
Index: 7
142163
- Name: __heap_end
143164
Kind: GLOBAL
144-
Index: 7
165+
Index: 8
145166
- Name: __memory_base
146167
Kind: GLOBAL
147-
Index: 8
168+
Index: 9
148169
- Name: __table_base
149170
Kind: GLOBAL
150-
Index: 9
171+
Index: 10
151172
- Name: __wasm_first_page_end
152173
Kind: GLOBAL
153-
Index: 10
174+
Index: 11
154175
- Type: CODE
155176
Functions:
156177
- Index: 0
@@ -169,6 +190,20 @@ Sections:
169190
- Index: 3
170191
Locals: []
171192
Body: 1082808080000F0B
193+
- Type: DATA
194+
Segments:
195+
- SectionOffset: 7
196+
InitFlags: 0
197+
Offset:
198+
Opcode: I32_CONST
199+
Value: 1024
200+
Content: '646174612073747200'
201+
- SectionOffset: 22
202+
InitFlags: 0
203+
Offset:
204+
Opcode: I32_CONST
205+
Value: 1036
206+
Content: '00040000'
172207
- Type: CUSTOM
173208
Name: name
174209
FunctionNames:
@@ -183,8 +218,17 @@ Sections:
183218
GlobalNames:
184219
- Index: 0
185220
Name: __stack_pointer
221+
DataSegmentNames:
222+
- Index: 0
223+
Name: .rodata
224+
- Index: 1
225+
Name: .data
186226
- Type: CUSTOM
227+
HeaderSecSizeEncodingLen: 2
187228
Name: producers
229+
Languages:
230+
- Name: C11
231+
Version: ''
188232
Tools:
189233
- Name: clang
190234
Version: '22.0.0git'
Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
# RUN: yaml2obj %S/Inputs/simple.wasm.yaml -o %t.wasm
22
# RUN: %lldb %t.wasm -o 'image dump symtab'
33

4-
# CHECK: Code 0x0000000000000002 {{.*}} __wasm_call_ctors
5-
# CHECK: Code 0x0000000000000005 {{.*}} add
6-
# CHECK: Code 0x000000000000002f {{.*}} __original_main
7-
# CHECK: Code 0x000000000000007c {{.*}} main
4+
# CHECK: Code 0x0000000000000002 0x0000000000000002 {{.*}} __wasm_call_ctors
5+
# CHECK: Code 0x0000000000000005 0x0000000000000029 {{.*}} add
6+
# CHECK: Code 0x000000000000002f 0x000000000000004c {{.*}} __original_main
7+
# CHECK: Code 0x000000000000007c 0x0000000000000009 {{.*}} main
8+
# CHECK: Data 0x000000000000022f 0x0000000000000041 {{.*}} .rodata
9+
# CHECK: Data 0x0000000000000270 0x0000000000000000 {{.*}} .data

0 commit comments

Comments
 (0)