[lldb] Create sections for Wasm segments #153634

JDevlieghere · 2025-08-14T17:55:08Z

This is a continuation of #153494. In a WebAssembly file, the "name" section contains names for the segments in the data section (WASM_NAMES_DATA_SEGMENT). We already parse these as symbols, and with this PR, we now also create sub-sections for each of the segments.

This is a continuation of llvm#153494. In a WebAssembly file, the "name" section contains names for the segments in the data section (WASM_NAMES_DATA_SEGMENT). We already parse these as sections, as with this PR, we also create sub-sections for the data segments.

llvmbot · 2025-08-14T17:55:41Z

@llvm/pr-subscribers-lldb

Author: Jonas Devlieghere (JDevlieghere)

Changes

This is a continuation of #153494. In a WebAssembly file, the "name" section contains names for the segments in the data section (WASM_NAMES_DATA_SEGMENT). We already parse these as symbols, and with this PR, we now also create sub-sections for each of the segments.

Full diff: https://github.com/llvm/llvm-project/pull/153634.diff

2 Files Affected:

(modified) lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp (+51-19)
(modified) lldb/test/Shell/Symtab/symtab-wasm.test (+13-7)

diff --git a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp
index b3144f28f4913..dc0b0241d1f24 100644
--- a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp
+++ b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp
@@ -281,7 +281,16 @@ ParseFunctions(SectionSP code_section_sp) {
   return functions;
 }
 
-static llvm::Expected<std::vector<AddressRange>>
+struct WasmSegment {
+  WasmSegment(SectionSP section_sp, lldb::offset_t offset, uint32_t size,
+              uint32_t flags)
+      : address_range(section_sp, offset, size), flags(flags) {};
+  std::string name;
+  AddressRange address_range;
+  uint32_t flags = 0;
+};
+
+static llvm::Expected<std::vector<WasmSegment>>
 ParseData(SectionSP data_section_sp) {
   DataExtractor data;
   data_section_sp->GetSectionData(data);
@@ -292,7 +301,7 @@ ParseData(SectionSP data_section_sp) {
   if (segment_count > std::numeric_limits<uint32_t>::max())
     return llvm::createStringError("segment count overflows uint32_t");
 
-  std::vector<AddressRange> segments;
+  std::vector<WasmSegment> segments;
   segments.reserve(segment_count);
 
   for (uint32_t i = 0; i < segment_count; ++i) {
@@ -304,7 +313,7 @@ ParseData(SectionSP data_section_sp) {
     if (flags > std::numeric_limits<uint32_t>::max())
       return llvm::createStringError("segment size overflows uint32_t");
 
-    segments.emplace_back(data_section_sp, offset, segment_size);
+    segments.emplace_back(data_section_sp, offset, segment_size, flags);
 
     std::optional<lldb::offset_t> next_offset =
         llvm::checkedAddUnsigned(offset, segment_size);
@@ -319,7 +328,7 @@ ParseData(SectionSP data_section_sp) {
 static llvm::Expected<std::vector<Symbol>>
 ParseNames(SectionSP name_section_sp,
            const std::vector<AddressRange> &function_ranges,
-           const std::vector<AddressRange> &segment_ranges) {
+           std::vector<WasmSegment> &segments) {
   DataExtractor name_section_data;
   name_section_sp->GetSectionData(name_section_data);
 
@@ -358,12 +367,14 @@ ParseNames(SectionSP name_section_sp,
       for (uint64_t i = 0; c && i < count; ++i) {
         const uint64_t idx = data.getULEB128(c);
         const std::optional<std::string> name = GetWasmString(data, c);
-        if (!name || idx >= segment_ranges.size())
+        if (!name || idx >= segments.size())
           continue;
+        // Update the segment name.
+        segments[i].name = *name;
         symbols.emplace_back(
             symbols.size(), Mangled(*name), lldb::eSymbolTypeData,
             /*external=*/false, /*is_debug=*/false, /*is_trampoline=*/false,
-            /*is_artificial=*/false, segment_ranges[idx],
+            /*is_artificial=*/false, segments[i].address_range,
             /*size_is_valid=*/true, /*contains_linker_annotations=*/false,
             /*flags=*/0);
       }
@@ -391,33 +402,34 @@ void ObjectFileWasm::ParseSymtab(Symtab &symtab) {
 
   // The name section contains names and indexes. First parse the data from the
   // relevant sections so we can access it by its index.
-  std::vector<AddressRange> function_ranges;
-  std::vector<AddressRange> segment_ranges;
+  std::vector<AddressRange> functions;
+  std::vector<WasmSegment> segments;
 
   // Parse the code section.
   if (SectionSP code_section_sp =
           m_sections_up->FindSectionByType(lldb::eSectionTypeCode, false)) {
-    llvm::Expected<std::vector<AddressRange>> functions =
+    llvm::Expected<std::vector<AddressRange>> maybe_functions =
         ParseFunctions(code_section_sp);
-    if (!functions) {
-      LLDB_LOG_ERROR(log, functions.takeError(),
+    if (!maybe_functions) {
+      LLDB_LOG_ERROR(log, maybe_functions.takeError(),
                      "Failed to parse Wasm code section: {0}");
       return;
     }
-    function_ranges = *functions;
+    functions = *maybe_functions;
   }
 
   // Parse the data section.
-  if (SectionSP data_section_sp =
-          m_sections_up->FindSectionByType(lldb::eSectionTypeData, false)) {
-    llvm::Expected<std::vector<AddressRange>> segments =
+  SectionSP data_section_sp =
+      m_sections_up->FindSectionByType(lldb::eSectionTypeData, false);
+  if (data_section_sp) {
+    llvm::Expected<std::vector<WasmSegment>> maybe_segments =
         ParseData(data_section_sp);
-    if (!segments) {
-      LLDB_LOG_ERROR(log, segments.takeError(),
+    if (!maybe_segments) {
+      LLDB_LOG_ERROR(log, maybe_segments.takeError(),
                      "Failed to parse Wasm data section: {0}");
       return;
     }
-    segment_ranges = *segments;
+    segments = *maybe_segments;
   }
 
   // Parse the name section.
@@ -429,7 +441,7 @@ void ObjectFileWasm::ParseSymtab(Symtab &symtab) {
   }
 
   llvm::Expected<std::vector<Symbol>> symbols =
-      ParseNames(name_section_sp, function_ranges, segment_ranges);
+      ParseNames(name_section_sp, functions, segments);
   if (!symbols) {
     LLDB_LOG_ERROR(log, symbols.takeError(), "Failed to parse Wasm names: {0}");
     return;
@@ -438,6 +450,26 @@ void ObjectFileWasm::ParseSymtab(Symtab &symtab) {
   for (const Symbol &symbol : *symbols)
     symtab.AddSymbol(symbol);
 
+  lldb::user_id_t segment_id = 0;
+  for (const WasmSegment &segment : segments) {
+    const lldb::addr_t segment_addr =
+        segment.address_range.GetBaseAddress().GetFileAddress();
+    const size_t segment_size = segment.address_range.GetByteSize();
+    SectionSP segment_sp = std::make_shared<Section>(
+        /*parent_section_sp=*/data_section_sp, GetModule(),
+        /*obj_file=*/this,
+        ++segment_id << 8, // 1-based segment index, shifted by 8 bits to avoid
+                           // collision with section IDs.
+        ConstString(segment.name), eSectionTypeData,
+        /*file_vm_addr=*/segment_addr,
+        /*vm_size=*/segment_size,
+        /*file_offset=*/segment_addr,
+        /*file_size=*/segment_size,
+        /*log2align=*/0, segment.flags);
+    m_sections_up->AddSection(segment_sp);
+    GetModule()->GetSectionList()->AddSection(segment_sp);
+  }
+
   symtab.Finalize();
 }
 
diff --git a/lldb/test/Shell/Symtab/symtab-wasm.test b/lldb/test/Shell/Symtab/symtab-wasm.test
index 5374b0c2f2892..5e7c7cabc5280 100644
--- a/lldb/test/Shell/Symtab/symtab-wasm.test
+++ b/lldb/test/Shell/Symtab/symtab-wasm.test
@@ -1,9 +1,15 @@
 # RUN: yaml2obj %S/Inputs/simple.wasm.yaml -o %t.wasm
-# RUN: %lldb %t.wasm -o 'image dump symtab'
+# RUN: %lldb %t.wasm -o 'image dump symtab' -o 'image dump sections' | FileCheck %s
 
-# CHECK: Code 0x0000000000000002 0x0000000000000002 {{.*}} __wasm_call_ctors
-# CHECK: Code 0x0000000000000005 0x0000000000000029 {{.*}} add
-# CHECK: Code 0x000000000000002f 0x000000000000004c {{.*}} __original_main
-# CHECK: Code 0x000000000000007c 0x0000000000000009 {{.*}} main
-# CHECK: Data 0x000000000000022f 0x0000000000000041 {{.*}} .rodata
-# CHECK: Data 0x0000000000000270 0x0000000000000000 {{.*}} .data
+CHECK: Code 0x0000000000000002 0x0000000000000002 0x00000000 __wasm_call_ctors
+CHECK: Code 0x0000000000000005 0x0000000000000029 0x00000000 add
+CHECK: Code 0x000000000000002f 0x000000000000004c 0x00000000 __original_main
+CHECK: Code 0x000000000000007c 0x0000000000000009 0x00000000 main
+CHECK: Data 0x000000000000022f 0x0000000000000041 0x00000000 .rodata
+CHECK: Data 0x0000000000000270 0x0000000000000000 0x00000000 .data
+
+CHECK: 0x0000000000000001 code       {{.*}} 0x000001a1 0x00000085 0x00000000 symtab-wasm.test.tmp.wasm.code
+CHECK: 0x0000000000000003 data       {{.*}} 0x0000022c 0x0000001a 0x00000000 symtab-wasm.test.tmp.wasm.data
+CHECK: 0x0000000000000040 wasm-name  {{.*}} 0x00000251 0x00000059 0x00000000 symtab-wasm.test.tmp.wasm.name
+CHECK: 0x0000000000000100 data       {{.*}} 0x0000022f 0x00000041 0x00000000 symtab-wasm.test.tmp.wasm.data..rodata
+CHECK: 0x0000000000000200 data       {{.*}} 0x00000270 0x00000000 0x00000000 symtab-wasm.test.tmp.wasm.data..data

DavidSpickett

Don't have much expertise here, defer to Adrian to approve.

DavidSpickett · 2025-08-15T08:02:54Z

lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp

@@ -304,7 +313,7 @@ ParseData(SectionSP data_section_sp) {
    if (flags > std::numeric_limits<uint32_t>::max())
      return llvm::createStringError("segment size overflows uint32_t");


This error should say flags, right?

Actually, no, the error message is correct, but the check above is not (it should check segment_size).

adrian-prantl · 2025-08-18T21:25:16Z

lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp

+      // Skip over the constant expression.
+      for (uint8_t b = 0; b != llvm::wasm::WASM_OPCODE_END;)
+        b = data.GetU8(&offset);
+    }


This looks correct.

FWIW, there might be a place for a wrapper function (or macro?) that does effectively a GetULEB32OrError()

Yeah, let me clean that up in a follow-up for the whole file.

This is a continuation of llvm#153494. In a WebAssembly file, the "name" section contains names for the segments in the data section (WASM_NAMES_DATA_SEGMENT). We already parse these as symbols, and with this PR, we now also create sub-sections for each of the segments. (cherry picked from commit 8bd9897)

JDevlieghere requested review from DavidSpickett and adrian-prantl August 14, 2025 17:55

llvmbot added the lldb label Aug 14, 2025

JDevlieghere changed the title ~~[lldb] Create sections fro Wasm segments~~ [lldb] Create sections for Wasm segments Aug 14, 2025

DavidSpickett reviewed Aug 15, 2025

View reviewed changes

JDevlieghere added 2 commits August 17, 2025 10:48

Check segment_size, not flags

83cd4e1

Support 'active' data segments

83ae0f6

adrian-prantl reviewed Aug 18, 2025

View reviewed changes

adrian-prantl approved these changes Aug 18, 2025

View reviewed changes

JDevlieghere merged commit 8bd9897 into llvm:main Aug 19, 2025
9 checks passed

JDevlieghere deleted the wasm-data-segments branch August 19, 2025 18:08

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[lldb] Create sections for Wasm segments #153634

[lldb] Create sections for Wasm segments #153634

Uh oh!

JDevlieghere commented Aug 14, 2025

Uh oh!

llvmbot commented Aug 14, 2025

Uh oh!

DavidSpickett left a comment

Uh oh!

DavidSpickett Aug 15, 2025

Uh oh!

JDevlieghere Aug 17, 2025

Uh oh!

adrian-prantl Aug 18, 2025

Uh oh!

adrian-prantl Aug 18, 2025

Uh oh!

JDevlieghere Aug 19, 2025

Uh oh!

Uh oh!

Uh oh!

		@@ -304,7 +313,7 @@ ParseData(SectionSP data_section_sp) {
		if (flags > std::numeric_limits<uint32_t>::max())
		return llvm::createStringError("segment size overflows uint32_t");

[lldb] Create sections for Wasm segments #153634

[lldb] Create sections for Wasm segments #153634

Uh oh!

Conversation

JDevlieghere commented Aug 14, 2025

Uh oh!

llvmbot commented Aug 14, 2025

Uh oh!

DavidSpickett left a comment

Choose a reason for hiding this comment

Uh oh!

DavidSpickett Aug 15, 2025

Choose a reason for hiding this comment

Uh oh!

JDevlieghere Aug 17, 2025

Choose a reason for hiding this comment

Uh oh!

adrian-prantl Aug 18, 2025

Choose a reason for hiding this comment

Uh oh!

adrian-prantl Aug 18, 2025

Choose a reason for hiding this comment

Uh oh!

JDevlieghere Aug 19, 2025

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!