From bd8a1e123b468c5e9f2251d2a7f55d9c22069396 Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Fri, 3 Oct 2025 20:05:11 -0700 Subject: [PATCH 1/4] [lldb] Add type hints to gdbclientutils.py and fix issues Everything in this should be python 3.9. The docs say the minimum is 3.8 but there's existing code in this suite that needs 3.9 so I think 3.9 is ok. Issues: qEcho() is passed an argument by the callers that the function didn't have Several functions in the base class would silently do nothing if not overriden. These now use @abstractmethod to require overrides sendall() had inconsistent return types between overrides --- .../Python/lldbsuite/test/gdbclientutils.py | 70 ++++++++++++------- 1 file changed, 43 insertions(+), 27 deletions(-) diff --git a/lldb/packages/Python/lldbsuite/test/gdbclientutils.py b/lldb/packages/Python/lldbsuite/test/gdbclientutils.py index b603c35c8df09..5fe1bc3155386 100644 --- a/lldb/packages/Python/lldbsuite/test/gdbclientutils.py +++ b/lldb/packages/Python/lldbsuite/test/gdbclientutils.py @@ -1,3 +1,4 @@ +from abc import ABC, abstractmethod import ctypes import errno import io @@ -5,6 +6,7 @@ import socket import traceback from lldbsuite.support import seven +from typing import Optional def checksum(message): @@ -86,8 +88,8 @@ class MockGDBServerResponder: handles any packet not recognized in the common packet handling code. """ - registerCount = 40 - packetLog = None + registerCount: int = 40 + packetLog: Optional[list[str]] = None class RESPONSE_DISCONNECT: pass @@ -103,6 +105,7 @@ def respond(self, packet): Return the unframed packet data that the server should issue in response to the given packet received from the client. """ + assert self.packetLog is not None self.packetLog.append(packet) if packet is MockGDBServer.PACKET_INTERRUPT: return self.interrupt() @@ -242,7 +245,7 @@ def qProcessInfo(self): def qHostInfo(self): return "ptrsize:8;endian:little;" - def qEcho(self): + def qEcho(self, _: int): return "E04" def qQueryGDBServer(self): @@ -263,10 +266,10 @@ def A(self, packet): def D(self, packet): return "OK" - def readRegisters(self): + def readRegisters(self) -> str: return "00000000" * self.registerCount - def readRegister(self, register): + def readRegister(self, register: int) -> str: return "00000000" def writeRegisters(self, registers_hex): @@ -306,7 +309,8 @@ def haltReason(self): # SIGINT is 2, return type is 2 digit hex string return "S02" - def qXferRead(self, obj, annex, offset, length): + def qXferRead(self, obj: str, annex: str, offset: int, + length: int) -> tuple[str | None, bool]: return None, False def _qXferResponse(self, data, has_more): @@ -374,15 +378,17 @@ class UnexpectedPacketException(Exception): pass -class ServerChannel: +class ServerChannel(ABC): """ A wrapper class for TCP or pty-based server. """ - def get_connect_address(self): + @abstractmethod + def get_connect_address(self) -> str: """Get address for the client to connect to.""" - def get_connect_url(self): + @abstractmethod + def get_connect_url(self) -> str: """Get URL suitable for process connect command.""" def close_server(self): @@ -394,10 +400,12 @@ def accept(self): def close_connection(self): """Close all resources used by the accepted connection.""" - def recv(self): + @abstractmethod + def recv(self) -> bytes: """Receive a data packet from the connected client.""" - def sendall(self, data): + @abstractmethod + def sendall(self, data: bytes) -> None: """Send the data to the connected client.""" @@ -428,11 +436,11 @@ def close_connection(self): self._connection.close() self._connection = None - def recv(self): + def recv(self) -> bytes: assert self._connection is not None return self._connection.recv(4096) - def sendall(self, data): + def sendall(self, data: bytes) -> None: assert self._connection is not None return self._connection.sendall(data) @@ -444,10 +452,10 @@ def __init__(self): )[0] super().__init__(family, type, proto, addr) - def get_connect_address(self): + def get_connect_address(self) -> str: return "[{}]:{}".format(*self._server_socket.getsockname()) - def get_connect_url(self): + def get_connect_url(self) -> str: return "connect://" + self.get_connect_address() @@ -455,10 +463,10 @@ class UnixServerSocket(ServerSocket): def __init__(self, addr): super().__init__(socket.AF_UNIX, socket.SOCK_STREAM, 0, addr) - def get_connect_address(self): + def get_connect_address(self) -> str: return self._server_socket.getsockname() - def get_connect_url(self): + def get_connect_url(self) -> str: return "unix-connect://" + self.get_connect_address() @@ -472,7 +480,7 @@ def __init__(self): self._primary = io.FileIO(primary, "r+b") self._secondary = io.FileIO(secondary, "r+b") - def get_connect_address(self): + def get_connect_address(self) -> str: libc = ctypes.CDLL(None) libc.ptsname.argtypes = (ctypes.c_int,) libc.ptsname.restype = ctypes.c_char_p @@ -485,7 +493,7 @@ def close_server(self): self._secondary.close() self._primary.close() - def recv(self): + def recv(self) -> bytes: try: return self._primary.read(4096) except OSError as e: @@ -494,8 +502,8 @@ def recv(self): return b"" raise - def sendall(self, data): - return self._primary.write(data) + def sendall(self, data: bytes) -> None: + self._primary.write(data) class MockGDBServer: @@ -528,18 +536,21 @@ def stop(self): self._thread.join() self._thread = None - def get_connect_address(self): + def get_connect_address(self) -> str: + assert self._socket is not None return self._socket.get_connect_address() - def get_connect_url(self): + def get_connect_url(self) -> str: + assert self._socket is not None return self._socket.get_connect_url() def run(self): + assert self._socket is not None # For testing purposes, we only need to worry about one client # connecting just one time. try: self._socket.accept() - except: + except Exception: traceback.print_exc() return self._shouldSendAck = True @@ -554,7 +565,7 @@ def run(self): self._receive(data) except self.TerminateConnectionException: pass - except Exception as e: + except Exception: print( "An exception happened when receiving the response from the gdb server. Closing the client..." ) @@ -587,7 +598,9 @@ def _parsePacket(self): Once a complete packet is found at the front of self._receivedData, its data is removed form self._receivedData. """ + assert self._receivedData is not None data = self._receivedData + assert self._receivedDataOffset is not None i = self._receivedDataOffset data_len = len(data) if data_len == 0: @@ -640,10 +653,13 @@ def _parsePacket(self): self._receivedDataOffset = 0 return packet - def _sendPacket(self, packet): - self._socket.sendall(seven.bitcast_to_bytes(frame_packet(packet))) + def _sendPacket(self, packet: str): + assert self._socket is not None + framed_packet = seven.bitcast_to_bytes(frame_packet(packet)) + self._socket.sendall(framed_packet) def _handlePacket(self, packet): + assert self._socket is not None if packet is self.PACKET_ACK: # Ignore ACKs from the client. For the future, we can consider # adding validation code to make sure the client only sends ACKs From 6d8da22b7911ff4d8114de619acce98e3fe9e815 Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Fri, 3 Oct 2025 20:09:50 -0700 Subject: [PATCH 2/4] [lldb] Add bidirectional packetLog to gdbclientutils.py While debugging the tests for PR155000 I found it helpful to have both sides of the simulated gdb-rsp traffic rather than just the responses so I've added a packetLog to MockGDBServer. The existing response-only one in MockGDBServerResponder is used by tests so I chose not to change it --- lldb/packages/Python/lldbsuite/test/gdbclientutils.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lldb/packages/Python/lldbsuite/test/gdbclientutils.py b/lldb/packages/Python/lldbsuite/test/gdbclientutils.py index 5fe1bc3155386..0395dcc5246f1 100644 --- a/lldb/packages/Python/lldbsuite/test/gdbclientutils.py +++ b/lldb/packages/Python/lldbsuite/test/gdbclientutils.py @@ -6,7 +6,7 @@ import socket import traceback from lldbsuite.support import seven -from typing import Optional +from typing import Optional, Literal def checksum(message): @@ -521,10 +521,12 @@ class MockGDBServer: _receivedData = None _receivedDataOffset = None _shouldSendAck = True + packetLog: list[tuple[Literal["recv"] | Literal["send"], str | bytes]] def __init__(self, socket): self._socket = socket self.responder = MockGDBServerResponder() + self.packetLog = [] def start(self): # Start a thread that waits for a client connection. @@ -651,11 +653,13 @@ def _parsePacket(self): # can start on the next packet the next time around self._receivedData = data[i:] self._receivedDataOffset = 0 + self.packetLog.append(("recv", packet)) return packet def _sendPacket(self, packet: str): assert self._socket is not None framed_packet = seven.bitcast_to_bytes(frame_packet(packet)) + self.packetLog.append(("send", framed_packet)) self._socket.sendall(framed_packet) def _handlePacket(self, packet): From abbfd8f845af06007b7c816a5efe51c9311c324d Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Fri, 5 Sep 2025 14:46:22 -0700 Subject: [PATCH 3/4] [lldb] Architecture plugins should report the vector element order. NFC Some targets like PowerPC store their - PowerPC little endian: little endian elements ordered 0, 1, 2, ... - PowerPC big endian: big endian elements ordered n-1, n-2, n-3, ... - ARM/MIPS little endian: little endian elements ordered 0, 1, 2, ... - ARM/MIPS big endian: big endian elements ordered 0, 1, 2, ... This matters when LLVM-IR values are transferred to/from target memory since LLVM-IR orders elements 0, 1, 2, ... regardless of endianness. This will be used in #155000 by changes to the IRInterpreter to allow it to evaluate some vectors without executing on the target --- lldb/include/lldb/Core/Architecture.h | 11 +++++++++++ .../Plugins/Architecture/PPC64/ArchitecturePPC64.cpp | 7 ++++++- .../Plugins/Architecture/PPC64/ArchitecturePPC64.h | 7 ++++++- 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/lldb/include/lldb/Core/Architecture.h b/lldb/include/lldb/Core/Architecture.h index ed64a895717a1..f8e9c43cd418d 100644 --- a/lldb/include/lldb/Core/Architecture.h +++ b/lldb/include/lldb/Core/Architecture.h @@ -138,6 +138,17 @@ class Architecture : public PluginInterface { std::shared_ptr current_unwindplan) { return lldb::UnwindPlanSP(); } + + /// Get the vector element order for this architecture. This determines how + /// vector elements are indexed. This matters in a few places such as reading/ + /// writing LLVM-IR values to/from target memory. Some architectures use + /// little-endian element ordering where element 0 is at the lowest address + /// even when the architecture is otherwise big-endian (e.g. MIPS MSA, ARM + /// NEON), but some architectures like PowerPC may use big-endian element + /// ordering where element 0 is at the highest address. + virtual lldb::ByteOrder GetVectorElementOrder() const { + return lldb::eByteOrderLittle; + } }; } // namespace lldb_private diff --git a/lldb/source/Plugins/Architecture/PPC64/ArchitecturePPC64.cpp b/lldb/source/Plugins/Architecture/PPC64/ArchitecturePPC64.cpp index b8fac55e41da7..a4690cc561a28 100644 --- a/lldb/source/Plugins/Architecture/PPC64/ArchitecturePPC64.cpp +++ b/lldb/source/Plugins/Architecture/PPC64/ArchitecturePPC64.cpp @@ -35,7 +35,8 @@ void ArchitecturePPC64::Terminate() { std::unique_ptr ArchitecturePPC64::Create(const ArchSpec &arch) { if (arch.GetTriple().isPPC64() && arch.GetTriple().getObjectFormat() == llvm::Triple::ObjectFormatType::ELF) - return std::unique_ptr(new ArchitecturePPC64()); + return std::unique_ptr( + new ArchitecturePPC64(arch.GetByteOrder())); return nullptr; } @@ -60,3 +61,7 @@ void ArchitecturePPC64::AdjustBreakpointAddress(const Symbol &func, addr.SetOffset(addr.GetOffset() + loffs); } + +lldb::ByteOrder ArchitecturePPC64::GetVectorElementOrder() const { + return m_vector_element_order; +} diff --git a/lldb/source/Plugins/Architecture/PPC64/ArchitecturePPC64.h b/lldb/source/Plugins/Architecture/PPC64/ArchitecturePPC64.h index 80f7f27b54cce..9a0edf371d539 100644 --- a/lldb/source/Plugins/Architecture/PPC64/ArchitecturePPC64.h +++ b/lldb/source/Plugins/Architecture/PPC64/ArchitecturePPC64.h @@ -30,9 +30,14 @@ class ArchitecturePPC64 : public Architecture { void AdjustBreakpointAddress(const Symbol &func, Address &addr) const override; + lldb::ByteOrder GetVectorElementOrder() const override; + private: static std::unique_ptr Create(const ArchSpec &arch); - ArchitecturePPC64() = default; + ArchitecturePPC64(lldb::ByteOrder vector_element_order) + : m_vector_element_order(vector_element_order) {} + + lldb::ByteOrder m_vector_element_order; }; } // namespace lldb_private From 9b846c01a6e12efaabb6bbcf97cc7361e6283067 Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Fri, 22 Aug 2025 10:54:52 -0700 Subject: [PATCH 4/4] [lldb] Add some vector operations to the IRInterpreter This allows the debugger to evaluate expressions without requiring the expression to be CodeGen'd and executed on the target. This should be more efficient for many existing targets but is necessary for targets which are not yet able to evaluate on the target. In terms of memory layout, we have: | Element Values | Element Order | ARM NEON | Endian-dependant | Zero-first | MIPS MSA | Endian-dependant | Zero-first | PowerPC | Endian-dependant | Zero-first | SystemZ | Endian-dependant | Zero-first | Where Zero-first means that element zero of an array/vector is at the lowest memory address. In terms of register layout things, we have: | Element Values | Element Order | Effective Element Order ARM NEON | Endian-dependant | Zero-first | Zero-first MIPS MSA | Endian-dependant | Zero-first | Zero-first PowerPC | Endian-dependant | Endian-dependant* | Zero-first (lane-swaps LE) SystemZ | Endian-dependant | Endian-dependant* | Zero-first (lane-swaps BE) *HW is endian-dependent but CodeGen accounts for it PowerPC is a little more complicated than shown above as it actually supports two modes: True-LE and Big-on-Little and the above table shows True-LE's behaviour. See https://llvm.org/devmtg/2014-10/Slides/Schmidt-SupportingVectorProgramming.pdf I haven't seen evidence that big-on-little is implemented yet so I haven't attempted to account for it. The end result of this is that transferring values between llvm-ir and memory is consistent between the four targets but transferring values between llvm-ir and registers potentially requires transformations. I've therefore: * Redefined GetVectorElementOrder to refer to the register layout not memory * Made Materializer/Dematerializer bail out when reading/writing vector values. * Made bitcast bail out for the cases where a bitcast is a shuffle rather than a nop. --- lldb/include/lldb/Core/Architecture.h | 6 +- lldb/include/lldb/Expression/IRInterpreter.h | 11 +- lldb/source/Expression/IRInterpreter.cpp | 250 +++++++++++++- lldb/source/Expression/Materializer.cpp | 36 ++ .../Clang/ClangExpressionParser.cpp | 2 +- .../TestRegisterExpressionEndian.py | 18 +- .../TestExprVectorElementOrder.py | 313 ++++++++++++++++++ .../vector-types/TestVectorTypesFormatting.py | 41 ++- 8 files changed, 650 insertions(+), 27 deletions(-) create mode 100644 lldb/test/API/commands/expression/expr-vec-elt-order/TestExprVectorElementOrder.py diff --git a/lldb/include/lldb/Core/Architecture.h b/lldb/include/lldb/Core/Architecture.h index f8e9c43cd418d..32f0467203fac 100644 --- a/lldb/include/lldb/Core/Architecture.h +++ b/lldb/include/lldb/Core/Architecture.h @@ -141,11 +141,11 @@ class Architecture : public PluginInterface { /// Get the vector element order for this architecture. This determines how /// vector elements are indexed. This matters in a few places such as reading/ - /// writing LLVM-IR values to/from target memory. Some architectures use - /// little-endian element ordering where element 0 is at the lowest address + /// writing LLVM-IR values to/from target registers. Some architectures use + /// little-endian element ordering where element 0 is at the lowest bits /// even when the architecture is otherwise big-endian (e.g. MIPS MSA, ARM /// NEON), but some architectures like PowerPC may use big-endian element - /// ordering where element 0 is at the highest address. + /// ordering where element 0 is at the highest bits. virtual lldb::ByteOrder GetVectorElementOrder() const { return lldb::eByteOrderLittle; } diff --git a/lldb/include/lldb/Expression/IRInterpreter.h b/lldb/include/lldb/Expression/IRInterpreter.h index 9106f1b4d1c3d..350a6e7f2b987 100644 --- a/lldb/include/lldb/Expression/IRInterpreter.h +++ b/lldb/include/lldb/Expression/IRInterpreter.h @@ -17,8 +17,10 @@ #include "llvm/Pass.h" namespace llvm { +class DataLayout; class Function; class Module; +class Instruction; } namespace lldb_private { @@ -37,7 +39,8 @@ class IRInterpreter { public: static bool CanInterpret(llvm::Module &module, llvm::Function &function, lldb_private::Status &error, - const bool support_function_calls); + const bool support_function_calls, + lldb_private::ExecutionContext &exe_ctx); static bool Interpret(llvm::Module &module, llvm::Function &function, llvm::ArrayRef args, @@ -51,6 +54,12 @@ class IRInterpreter { private: static bool supportsFunction(llvm::Function &llvm_function, lldb_private::Status &err); + + static bool InterpretExtractElement( + const llvm::Instruction *inst, class InterpreterStackFrame &frame, + const llvm::DataLayout &data_layout, llvm::Module &module, + lldb_private::IRExecutionUnit &execution_unit, + lldb_private::Status &error, lldb_private::Log *log); }; #endif diff --git a/lldb/source/Expression/IRInterpreter.cpp b/lldb/source/Expression/IRInterpreter.cpp index 91404831aeb9b..86c5ce3c6fd4f 100644 --- a/lldb/source/Expression/IRInterpreter.cpp +++ b/lldb/source/Expression/IRInterpreter.cpp @@ -70,6 +70,75 @@ static std::string PrintType(const Type *type, bool truncate = false) { return s; } +static bool +isNonTrivialBitcast(const Instruction &inst, + lldb_private::ExecutionContext &exe_ctx) { + auto *result_type = dyn_cast(inst.getType()); + auto *operand = inst.getOperand(0); + auto *operand_type = dyn_cast(operand->getType()); + + // If neither type is a vector then the bitcast is trivial + if (!result_type && !operand_type) + return false; + + // Get endianness and element order from the architecture + lldb::ByteOrder byte_order = lldb::eByteOrderLittle; + lldb::ByteOrder element_order = lldb::eByteOrderLittle; + + lldb::TargetSP target_sp = exe_ctx.GetTargetSP(); + if (target_sp) { + const auto *arch_plugin = target_sp->GetArchitecturePlugin(); + if (arch_plugin) { + byte_order = target_sp->GetArchitecture().GetByteOrder(); + element_order = arch_plugin->GetVectorElementOrder(); + } + } + + // Bitcast is trivial if endianness matches element order + if (byte_order == element_order) + return false; + + // If the element order and value byte order disagree then vector bitcasts + // aren't no-ops when the element sizes change. For example given: + // <2 x i32> + // then bitcast's LangRef definition of + // store <2 x i32>, ptr @loc + // load <4 x i16>, ptr @loc + // gives: + // Order | Mem after Store | <4 x i16> after Load | Bitcast + // LF | 3322110077665544 | 0x3322 0x0011 0x6677 0x4455 | no-op + // BN | 0011223344556677 | 0x0011 0x2233 0x4455 0x6677 | shuffle + // LF | 7766554433221100 | 0x6677 0x4455 0x3322 0x1100 | shuffle + // BN | 4455667700112233 | 0x4455 0x6677 0x0011 0x2233 | no-op + // Order abbreviations: + // L = Little Endian + // B = Big Endian + // F = Lane 0 is first + // N = Lane N-1 is first + + // If only one type is a vector, then we'll assume it's non-trivial on the + // basis that this changes the number of elements from N to 1 or the other + // way around. + if (!result_type || !operand_type) + return false; + + // I'm not sure how scalable vectors behave in this situation. + // Reject them to be safe. + if (!result_type->getElementCount().isFixed() || + !operand_type->getElementCount().isFixed()) + return true; + + // We can handle the cases that are no-op by virtue of the element + // sizes/counts not changing but the shuffle cases aren't + // implemented in IRInterpreter::Interpret so decline to interpret + // them. + if (result_type->getElementCount() != operand_type->getElementCount() || + result_type->getScalarSizeInBits() != operand_type->getScalarSizeInBits()) + return true; + + return false; +} + static bool CanIgnoreCall(const CallInst *call) { const llvm::Function *called_function = call->getCalledFunction(); @@ -367,7 +436,62 @@ class InterpreterStackFrame { return true; } + bool ResolveVectorConstant(lldb::addr_t process_address, + const Constant *constant) { + auto *vector_type = dyn_cast(constant->getType()); + if (!vector_type) + return false; + + Type *element_type = vector_type->getElementType(); + unsigned num_elements = vector_type->getNumElements(); + size_t element_size = m_target_data.getTypeStoreSize(element_type); + size_t total_size = element_size * num_elements; + + lldb_private::DataBufferHeap buf(total_size, 0); + uint8_t *data_ptr = buf.GetBytes(); + + if (isa(constant)) { + // Zero initializer - buffer is already zeroed, just write it + lldb_private::Status write_error; + m_execution_unit.WriteMemory(process_address, buf.GetBytes(), + buf.GetByteSize(), write_error); + return write_error.Success(); + } + + if (const ConstantDataVector *cdv = + dyn_cast(constant)) { + for (unsigned i = 0; i < num_elements; ++i) { + const Constant *element = cdv->getElementAsConstant(i); + APInt element_value; + if (!ResolveConstantValue(element_value, element)) + return false; + + size_t offset = i * element_size; + + lldb_private::Scalar element_scalar( + element_value.zextOrTrunc(element_size * 8)); + lldb_private::Status get_data_error; + if (!element_scalar.GetAsMemoryData(data_ptr + offset, element_size, + m_byte_order, get_data_error)) + return false; + } + lldb_private::Status write_error; + m_execution_unit.WriteMemory(process_address, buf.GetBytes(), + buf.GetByteSize(), write_error); + + return write_error.Success(); + } + + return false; + } + bool ResolveConstant(lldb::addr_t process_address, const Constant *constant) { + // Handle vector constants specially since they can't be represented as a + // single APInt + if (constant->getType()->isVectorTy()) { + return ResolveVectorConstant(process_address, constant); + } + APInt resolved_value; if (!ResolveConstantValue(resolved_value, constant)) @@ -484,8 +608,12 @@ static bool CanResolveConstant(llvm::Constant *constant) { return false; case Value::ConstantIntVal: case Value::ConstantFPVal: + return true; case Value::FunctionVal: return true; + case Value::ConstantDataVectorVal: + case Value::ConstantAggregateZeroVal: + return constant->getType()->getTypeID() == Type::FixedVectorTyID; case Value::ConstantExprVal: if (const ConstantExpr *constant_expr = dyn_cast(constant)) { switch (constant_expr->getOpcode()) { @@ -522,7 +650,8 @@ static bool CanResolveConstant(llvm::Constant *constant) { bool IRInterpreter::CanInterpret(llvm::Module &module, llvm::Function &function, lldb_private::Status &error, - const bool support_function_calls) { + const bool support_function_calls, + lldb_private::ExecutionContext &exe_ctx) { lldb_private::Log *log(GetLog(LLDBLog::Expressions)); bool saw_function_with_body = false; @@ -548,9 +677,13 @@ bool IRInterpreter::CanInterpret(llvm::Module &module, llvm::Function &function, } case Instruction::Add: case Instruction::Alloca: - case Instruction::BitCast: case Instruction::Br: case Instruction::PHI: + case Instruction::ExtractElement: + break; + case Instruction::BitCast: + if (isNonTrivialBitcast(ii, exe_ctx)) + return false; break; case Instruction::Call: { CallInst *call_inst = dyn_cast(&ii); @@ -645,6 +778,7 @@ bool IRInterpreter::CanInterpret(llvm::Module &module, llvm::Function &function, default: break; case Type::FixedVectorTyID: + break; case Type::ScalableVectorTyID: { LLDB_LOGF(log, "Unsupported operand type: %s", PrintType(operand_type).c_str()); @@ -657,8 +791,9 @@ bool IRInterpreter::CanInterpret(llvm::Module &module, llvm::Function &function, // The IR interpreter currently doesn't know about // 128-bit integers. As they're not that frequent, // we can just fall back to the JIT rather than - // choking. - if (operand_type->getPrimitiveSizeInBits() > 64) { + // choking. However, allow vectors since we handle them above. + if (operand_type->getPrimitiveSizeInBits() > 64 && + !operand_type->isVectorTy()) { LLDB_LOGF(log, "Unsupported operand type: %s", PrintType(operand_type).c_str()); error = @@ -1543,9 +1678,7 @@ bool IRInterpreter::Interpret(llvm::Module &module, llvm::Function &function, // Void return type if (returnType->isVoidTy()) { // Cant assign to void types, so we leave the frame untouched - } else - // Integer or pointer return type - if (returnType->isIntegerTy() || returnType->isPointerTy()) { + } else if (returnType->isIntegerTy() || returnType->isPointerTy()) { // Get the encapsulated return value lldb::ValueObjectSP retVal = call_plan_sp.get()->GetReturnValueObject(); @@ -1567,6 +1700,11 @@ bool IRInterpreter::Interpret(llvm::Module &module, llvm::Function &function, frame.AssignValue(inst, returnVal, module); } } break; + case Instruction::ExtractElement: { + if (!InterpretExtractElement(inst, frame, data_layout, module, + execution_unit, error, log)) + return false; + } break; } ++frame.m_ii; @@ -1574,3 +1712,101 @@ bool IRInterpreter::Interpret(llvm::Module &module, llvm::Function &function, return false; } + +bool IRInterpreter::InterpretExtractElement( + const llvm::Instruction *inst, InterpreterStackFrame &frame, + const llvm::DataLayout &data_layout, llvm::Module &module, + lldb_private::IRExecutionUnit &execution_unit, lldb_private::Status &error, + lldb_private::Log *log) { + const ExtractElementInst *extract_inst = cast(inst); + + // Get the vector and index operands + const Value *vector_operand = extract_inst->getVectorOperand(); + const Value *index_operand = extract_inst->getIndexOperand(); + + // Get the vector address + lldb::addr_t vector_addr = + frame.ResolveValue(vector_operand, module); + + if (vector_addr == LLDB_INVALID_ADDRESS) { + LLDB_LOGF(log, "ExtractElement's vector doesn't resolve to anything"); + error = lldb_private::Status::FromErrorString(bad_value_error); + return false; + } + + // Evaluate the index + lldb_private::Scalar index_scalar; + if (!frame.EvaluateValue(index_scalar, index_operand, module)) { + LLDB_LOGF(log, "Couldn't evaluate index %s", + PrintValue(index_operand).c_str()); + error = lldb_private::Status::FromErrorString(bad_value_error); + return false; + } + + uint64_t index = index_scalar.ULongLong(); + + // Get the vector type information + auto *vector_type = dyn_cast(vector_operand->getType()); + if (!vector_type) { + LLDB_LOGF(log, "ExtractElement instruction doesn't have a fixed vector " + "operand type"); + error = lldb_private::Status::FromErrorString(interpreter_internal_error); + return false; + } + + unsigned num_elements = vector_type->getNumElements(); + if (index >= num_elements) { + LLDB_LOG(log, + "ExtractElement index {0} is out of bounds for vector with " + "{1} elements", + index, num_elements); + error = lldb_private::Status::FromErrorString(bad_value_error); + return false; + } + + Type *element_type = vector_type->getElementType(); + size_t element_size = data_layout.getTypeStoreSize(element_type); + + size_t element_offset = index * element_size; + + // Allocate space for the result element + lldb::addr_t result_addr = frame.ResolveValue(extract_inst, module); + if (result_addr == LLDB_INVALID_ADDRESS) { + LLDB_LOG(log, "ExtractElement's result doesn't resolve to anything"); + error = lldb_private::Status::FromErrorString(bad_value_error); + return false; + } + + // Read the element from the vector + lldb_private::DataBufferHeap element_buffer(element_size, 0); + lldb_private::Status read_error; + execution_unit.ReadMemory(element_buffer.GetBytes(), + vector_addr + element_offset, element_size, + read_error); + if (!read_error.Success()) { + LLDB_LOG(log, "Couldn't read element data for ExtractElement"); + error = lldb_private::Status::FromErrorString(memory_read_error); + return false; + } + + // Write the element to the result location + lldb_private::Status write_error; + execution_unit.WriteMemory(result_addr, element_buffer.GetBytes(), + element_size, write_error); + if (!write_error.Success()) { + LLDB_LOG(log, "Couldn't write result for ExtractElement"); + error = lldb_private::Status::FromErrorString(memory_write_error); + return false; + } + + if (log) { + LLDB_LOG(log, + "Interpreted an ExtractElement\n" + " Vector: {0}\n" + " Index: {1}\n" + " Element offset: {2}\n" + " Result: {3}\n", + vector_addr, index, element_offset, result_addr); + } + return true; +} diff --git a/lldb/source/Expression/Materializer.cpp b/lldb/source/Expression/Materializer.cpp index 771a9ab84a20c..d9e5194a8d94a 100644 --- a/lldb/source/Expression/Materializer.cpp +++ b/lldb/source/Expression/Materializer.cpp @@ -1369,6 +1369,24 @@ class EntityRegister : public Materializer::Entity { return; } + // Check for unsupported vector element ordering + if (m_register_info.encoding == lldb::eEncodingVector) { + ExecutionContext exe_ctx; + frame_sp->CalculateExecutionContext(exe_ctx); + if (exe_ctx.GetTargetPtr()) { + const auto *arch_plugin = + exe_ctx.GetTargetRef().GetArchitecturePlugin(); + if (arch_plugin && + arch_plugin->GetVectorElementOrder() == lldb::eByteOrderBig) { + err = Status::FromErrorStringWithFormat( + "unable to materialize register %s: vector registers with " + "big-endian element ordering are not yet supported", + m_register_info.name); + return; + } + } + } + lldb::RegisterContextSP reg_context_sp = frame_sp->GetRegisterContext(); if (!reg_context_sp->ReadRegister(&m_register_info, reg_value)) { @@ -1431,6 +1449,24 @@ class EntityRegister : public Materializer::Entity { return; } + // Check for unsupported vector element ordering + if (m_register_info.encoding == lldb::eEncodingVector) { + ExecutionContext exe_ctx; + frame_sp->CalculateExecutionContext(exe_ctx); + if (exe_ctx.GetTargetPtr()) { + const auto *arch_plugin = + exe_ctx.GetTargetRef().GetArchitecturePlugin(); + if (arch_plugin && + arch_plugin->GetVectorElementOrder() == lldb::eByteOrderBig) { + err = Status::FromErrorStringWithFormat( + "unable to dematerialize register %s: vector registers with " + "big-endian element ordering are not yet supported", + m_register_info.name); + return; + } + } + } + lldb::RegisterContextSP reg_context_sp = frame_sp->GetRegisterContext(); map.GetMemoryData(register_data, load_addr, m_register_info.byte_size, diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp index 3c49c911108a3..cc91f09085b76 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp @@ -1523,7 +1523,7 @@ lldb_private::Status ClangExpressionParser::DoPrepareForExecution( !process ? false : process->CanInterpretFunctionCalls(); can_interpret = IRInterpreter::CanInterpret( *execution_unit_sp->GetModule(), *execution_unit_sp->GetFunction(), - interpret_error, interpret_function_calls); + interpret_error, interpret_function_calls, exe_ctx); if (!can_interpret && execution_policy == eExecutionPolicyNever) { err = Status::FromErrorStringWithFormat( diff --git a/lldb/test/API/commands/expression/TestRegisterExpressionEndian.py b/lldb/test/API/commands/expression/TestRegisterExpressionEndian.py index d6de8731385b6..16d576fdfb1e6 100644 --- a/lldb/test/API/commands/expression/TestRegisterExpressionEndian.py +++ b/lldb/test/API/commands/expression/TestRegisterExpressionEndian.py @@ -5,9 +5,13 @@ from enum import Enum from textwrap import dedent import lldb -from lldbsuite.test.lldbtest import * -from lldbsuite.test.decorators import * -from lldbsuite.test.gdbclientutils import * +from lldbsuite.test.lldbtest import lldbutil +from lldbsuite.test.decorators import ( + skipIfXmlSupportMissing, + skipIfRemote, + skipIfLLVMTargetMissing, +) +from lldbsuite.test.gdbclientutils import MockGDBServerResponder from lldbsuite.test.lldbgdbclient import GDBRemoteTestBase @@ -22,12 +26,13 @@ def __init__(self, doc, endian): self.target_xml = doc self.endian = endian - def qXferRead(self, obj, annex, offset, length): + def qXferRead(self, obj: str, annex:str , offset: int, + length: int) -> tuple[str | None, bool]: if annex == "target.xml": return self.target_xml, False - return (None,) + return (None, False) - def readRegister(self, regnum): + def readRegister(self, register: int): return "E01" def readRegisters(self): @@ -51,6 +56,7 @@ def do_endian_test(self, endian): ), }[endian] + assert self.server is not None self.server.responder = Responder( dedent( f"""\ diff --git a/lldb/test/API/commands/expression/expr-vec-elt-order/TestExprVectorElementOrder.py b/lldb/test/API/commands/expression/expr-vec-elt-order/TestExprVectorElementOrder.py new file mode 100644 index 0000000000000..623056356ae52 --- /dev/null +++ b/lldb/test/API/commands/expression/expr-vec-elt-order/TestExprVectorElementOrder.py @@ -0,0 +1,313 @@ +""" Check that registers written to memory for expression evaluation are + written using the target's endian not the host's. +""" + +from dataclasses import dataclass +from enum import Enum +from textwrap import dedent +import lldb +from lldbsuite.test.lldbtest import lldbutil +from lldbsuite.test.decorators import ( + skipIfXmlSupportMissing, + skipIfRemote, + skipIfLLVMTargetMissing, +) +from lldbsuite.test.gdbclientutils import MockGDBServerResponder +from lldbsuite.test.lldbgdbclient import GDBRemoteTestBase + + +class Endian(Enum): + BIG = 0 + LITTLE = 1 + + +class ElementOrder(Enum): + # Memory is laid out as [0, 1, ... N-1] + ZEROFIRST = 0 + # Memory is laid out as [N-1, N-2, ..., 0] + LASTFIRST = 1 + + +@dataclass +class Config: + architecture: str + pc_reg_name: str + yaml_file: str + data: str + machine: str + endian: Endian + element_order: ElementOrder + + +class Responder(MockGDBServerResponder): + def __init__(self, doc: str, endian: Endian, element_order: ElementOrder): + super().__init__() + self.target_xml = doc + self.endian = endian + self.element_order = element_order + + def qXferRead(self, obj, annex, offset, length) -> tuple[str | None, bool]: + if obj == 'features' and annex == "target.xml": + more = offset + length < len(self.target_xml) + return self.target_xml[offset:offset+length], more + return (None, False) + + def readRegister(self, register: int) -> str: + _ = register # Silence unused parameter hint + return "E01" + + def readRegisters(self) -> str: + # 64 bit pc value. + data = ["00", "00", "00", "00", "00", "00", "12", "34"] + if self.endian == Endian.LITTLE: + data.reverse() + return "".join(data) + + + +class TestXMLRegisterFlags(GDBRemoteTestBase): + def do_expr_eval(self, config_name: str): + cfg = { + # AArch64 stores elements in little-endian, zero-first order. + "aarch64-le": Config( + architecture="aarch64", + pc_reg_name="pc", + yaml_file="aarch64.yaml", + data="ELFDATA2LSB", + machine="EM_AARCH64", + endian=Endian.LITTLE, + element_order=ElementOrder.ZEROFIRST, + ), + # AArch64 stores elements in big-endian but the vector remains in + # the same zero-first order. + "aarch64-be": Config( + architecture="aarch64_be", + pc_reg_name="pc", + yaml_file="aarch64be.yaml", + data="ELFDATA2MSB", + machine="EM_AARCH64", + endian=Endian.BIG, + element_order=ElementOrder.ZEROFIRST, + ), + # PowerPC stores the whole vector as little-endian + "ppc-le": Config( + architecture="ppc", + pc_reg_name="pc", + yaml_file="ppc.yaml", + data="ELFDATA2LSB", + machine="EM_PPC64", + endian=Endian.LITTLE, + element_order=ElementOrder.ZEROFIRST, + ), + # PowerPC stores the whole vector as big-endian which reverses + # element order + "ppc-be": Config( + architecture="ppc", + pc_reg_name="pc", + yaml_file="ppcbe.yaml", + data="ELFDATA2MSB", + machine="EM_PPC64", + endian=Endian.BIG, + element_order=ElementOrder.LASTFIRST, + ), + # Vectors are stored in the same element order as arrays. + # Note: WebAssembly loads vectors in reverse order because it + # requires that vectors behave like little-endian first-zero + # even when reinterpreting memory as a vector with different + # element sizes. + # C++ on the other hand effectively shuffles the bytes like + # AArch64 Big Endian does + "systemz-be": Config( + architecture="s390x", + pc_reg_name="pswa", + yaml_file="s390x.yaml", + data="ELFDATA2MSB", + machine="EM_S390", + endian=Endian.BIG, + element_order=ElementOrder.ZEROFIRST, + ), + }[config_name] + + assert self.server is not None + self.server.responder = Responder( + dedent( + f"""\ + + + {cfg.architecture} + + + + """ + ), + cfg.endian, + cfg.element_order, + ) + + # We need to have a program file, so that we have a full type system, + # so that we can do the casts later. + obj_path = self.getBuildArtifact("main.o") + yaml_path = self.getBuildArtifact(cfg.yaml_file) + with open(yaml_path, "w") as f: + f.write( + dedent( + f"""\ + --- !ELF + FileHeader: + Class: ELFCLASS64 + Data: {cfg.data} + Type: ET_REL + Machine: {cfg.machine} + DWARF: + debug_abbrev: + - Table: + - Code: 1 + Tag: DW_TAG_compile_unit + Children: DW_CHILDREN_yes + Attributes: + - Attribute: DW_AT_language + Form: DW_FORM_data2 + - Code: 2 + Tag: DW_TAG_typedef + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_type + Form: DW_FORM_ref4 + - Attribute: DW_AT_name + Form: DW_FORM_string + - Code: 3 + Tag: DW_TAG_array_type + Children: DW_CHILDREN_yes + Attributes: + - Attribute: DW_AT_GNU_vector + Form: DW_FORM_flag_present + - Attribute: DW_AT_type + Form: DW_FORM_ref4 + - Code: 4 + Tag: DW_TAG_subrange_type + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_count + Form: DW_FORM_data1 + - Code: 5 + Tag: DW_TAG_base_type + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_string + - Attribute: DW_AT_encoding + Form: DW_FORM_data1 + - Attribute: DW_AT_byte_size + Form: DW_FORM_data1 + debug_info: + - Version: 4 + AbbrevTableID: 0 + AbbrOffset: 0x0 + AddrSize: 8 + Entries: + - AbbrCode: 1 + Values: + - Value: 0x0004 # DW_LANG_C_plus_plus + - AbbrCode: 2 # typedef + Values: + - Value: 27 # DW_AT_type: Reference to array type at 0x1b + - CStr: v4float # DW_AT_name + - AbbrCode: 3 # array_type + Values: + - Value: 0x01 # DW_AT_GNU_vector (flag_present) + - Value: 35 # DW_AT_type: Reference to float at 0x23 + - AbbrCode: 4 # subrange_type + Values: + - Value: 0x04 # DW_AT_count: 4 elements + - AbbrCode: 0 # End of array_type children + - AbbrCode: 5 # base_type (float) + Values: + - CStr: float # DW_AT_name + - Value: 0x04 # DW_AT_encoding: DW_ATE_float + - Value: 0x04 # DW_AT_byte_size: 4 bytes + - AbbrCode: 0 # End of compile unit + ... + """ + ) + ) + self.yaml2obj(yaml_path, obj_path) + target = self.dbg.CreateTarget(obj_path) + + process = self.connect(target) + lldbutil.expect_state_changes( + self, self.dbg.GetListener(), process, [lldb.eStateStopped] + ) + + # Enable logging to debug type lookup and expression evaluation + self.TraceOn() + log_file = self.getBuildArtifact("lldb.log") + self.runCmd(f"log enable lldb types expr -f {log_file}") + self.runCmd("image dump symtab", check=False) + self.runCmd("image lookup -t v4float", check=False) + self.runCmd("image lookup -t float", check=False) + + # If expressions convert register values into target endian, the + # vector should be stored correctly in memory. + self.expect("expr --language c++ -- (v4float){0.25, 0.5, 0.75, 1.0}", substrs=["0.25", "0.5", "0.75", "1"]) + + # Check the raw bytes to verify endianness + result = self.frame().EvaluateExpression("(v4float){0.25, 0.5, 0.75, 1.0}", lldb.eDynamicCanRunTarget) + self.assertTrue(result.IsValid()) + error = lldb.SBError() + data = result.GetData() + bytes_list = [data.GetUnsignedInt8(error, i) for i in range(16)] + # For big-endian: 0x3e800000, 0x3f000000, 0x3f400000, 0x3f800000 + # For little-endian: bytes are reversed within each float + expected_big = [0x3e, 0x80, 0x00, 0x00, 0x3f, 0x00, 0x00, 0x00, 0x3f, 0x40, 0x00, 0x00, 0x3f, 0x80, 0x00, 0x00] + expected_little = [0x00, 0x00, 0x80, 0x3e, 0x00, 0x00, 0x00, 0x3f, 0x00, 0x00, 0x40, 0x3f, 0x00, 0x00, 0x80, 0x3f] + if cfg.endian == Endian.BIG: + self.assertEqual(bytes_list, expected_big) + else: + self.assertEqual(bytes_list, expected_little) + + pc = ( + process.thread[0] + .frame[0] + .GetRegisters() + .GetValueAtIndex(0) + .GetChildMemberWithName("pc") + ) + ull = target.FindTypes("unsigned long long").GetTypeAtIndex(0) + pc_ull = pc.Cast(ull) + + self.assertEqual(pc.GetValue(), pc_ull.GetValue()) + self.assertEqual(pc.GetValueAsAddress(), pc_ull.GetValueAsAddress()) + self.assertEqual(pc.GetValueAsSigned(), pc_ull.GetValueAsSigned()) + self.assertEqual(pc.GetValueAsUnsigned(), pc_ull.GetValueAsUnsigned()) + + @skipIfXmlSupportMissing + @skipIfRemote + @skipIfLLVMTargetMissing("AArch64") + def test_aarch64_little_endian_target(self): + self.do_expr_eval("aarch64-le") + + # AArch64 doesn't seem to have implemented big-endian in lldb + # Both big-endian and little-endian triples select the same ArchSpec. + #@skipIfXmlSupportMissing + #@skipIfRemote + #@skipIfLLVMTargetMissing("AArch64") + #def test_aarch64_big_endian(self): + # self.do_expr_eval("aarch64-be") + + @skipIfXmlSupportMissing + @skipIfRemote + @skipIfLLVMTargetMissing("PowerPC") + def test_ppc_little_endian(self): + self.do_expr_eval("ppc-le") + + @skipIfXmlSupportMissing + @skipIfRemote + @skipIfLLVMTargetMissing("PowerPC") + def test_ppc_big_endian_target(self): + self.do_expr_eval("ppc-be") + + @skipIfXmlSupportMissing + @skipIfRemote + @skipIfLLVMTargetMissing("SystemZ") + def test_systemz_big_endian_target(self): + self.do_expr_eval("systemz-be") diff --git a/lldb/test/API/functionalities/data-formatter/vector-types/TestVectorTypesFormatting.py b/lldb/test/API/functionalities/data-formatter/vector-types/TestVectorTypesFormatting.py index 1839c28aeb29f..e5e0062721956 100644 --- a/lldb/test/API/functionalities/data-formatter/vector-types/TestVectorTypesFormatting.py +++ b/lldb/test/API/functionalities/data-formatter/vector-types/TestVectorTypesFormatting.py @@ -15,10 +15,7 @@ def setUp(self): # Find the line number to break at. self.line = line_number("main.cpp", "// break here") - # rdar://problem/14035604 - @skipIf(compiler="gcc") # gcc don't have ext_vector_type extension - def test_with_run_command(self): - """Check that vector types format properly""" + def setup_and_run_to_breakpoint(self): self.build() self.runCmd("file " + self.getBuildArtifact("a.out"), CURRENT_EXECUTABLE_SET) @@ -43,8 +40,7 @@ def cleanup(): # Execute the cleanup function during test case tear down. self.addTearDownHook(cleanup) - pass # my code never fails - + def vector_formatting_test(self, allow_jit=True): v = self.frame().FindVariable("v") v.SetPreferSyntheticValue(True) v.SetFormat(lldb.eFormatVectorOfFloat32) @@ -66,15 +62,30 @@ def cleanup(): v.GetChildAtIndex(3).GetData().float[0], 2.50, "child 3 == 2.50" ) + jit_flag = "" if allow_jit else " --allow-jit false" self.expect( - "expr -f int16_t[] -- v", + f"expr{jit_flag} -f int16_t[] -- v", substrs=["(0, 16288, 0, 16288, 0, 16416, 0, 16416)"], ) self.expect( - "expr -f uint128_t[] -- v", + f"expr{jit_flag} -f uint128_t[] -- v", substrs=["(85236745249553456609335044694184296448)"], ) - self.expect("expr -f float32[] -- v", substrs=["(1.25, 1.25, 2.5, 2.5)"]) + self.expect( + f"expr{jit_flag} -f float32[] -- v", substrs=["(1.25, 1.25, 2.5, 2.5)"] + ) + + self.expect(f"expr{jit_flag} -- f4", substrs=["(1.25, 1.25, 2.5, 2.5)"]) + + self.expect(f"expr{jit_flag} -- float4(0)", substrs=["(0, 0, 0, 0)"]) + self.expect(f"expr{jit_flag} -- float4(1)", substrs=["(1, 1, 1, 1)"]) + self.expect( + f"expr{jit_flag} -- float4{{1.25, 2.5, 3.25, 4.5}}", + substrs=["(1.25, 2.5, 3.25, 4.5)"], + ) + self.expect( + f"expr{jit_flag} -- float4{{0.1, 0.2, 0.3, 0.4}}[0]", substrs=["0.1"] + ) oldValue = v.GetChildAtIndex(0).GetValue() v.SetFormat(lldb.eFormatHex) @@ -93,3 +104,15 @@ def cleanup(): self.assertEqual(f3.GetChildAtIndex(0).GetData().float[0], 1.25) self.assertEqual(f3.GetChildAtIndex(1).GetData().float[0], 2.50) self.assertEqual(f3.GetChildAtIndex(2).GetData().float[0], 2.50) + + # rdar://problem/14035604 + @skipIf(compiler="gcc") # gcc don't have ext_vector_type extension + def test_with_run_command(self): + self.setup_and_run_to_breakpoint() + self.runCmd("settings set plugin.jit-loader.gdb.enable on") + self.vector_formatting_test() + + @skipIf(compiler="gcc") # gcc don't have ext_vector_type extension + def test_with_run_command_no_jit(self): + self.setup_and_run_to_breakpoint() + self.vector_formatting_test(allow_jit=False)