- 
                Notifications
    You must be signed in to change notification settings 
- Fork 15k
[lldb] Add some vector operations to the IRInterpreter #155000
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
| @llvm/pr-subscribers-lldb Author: Daniel Sanders (dsandersllvm) ChangesThis allows the debugger to evaluate expressions without requiring the As far as I know most targets have a vector memory layout that matches the I've attempted to implement the correct element ordering on the relevant Patch is 30.54 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/155000.diff 7 Files Affected: 
 diff --git a/lldb/include/lldb/Core/Architecture.h b/lldb/include/lldb/Core/Architecture.h
index b6fc1a20e1e69..435fe20121869 100644
--- a/lldb/include/lldb/Core/Architecture.h
+++ b/lldb/include/lldb/Core/Architecture.h
@@ -129,6 +129,17 @@ class Architecture : public PluginInterface {
                                        RegisterContext ®_context) const {
     return false;
   }
+
+  // Get the vector element order for this architecture. This determines how
+  // vector elements are indexed. This matters in a few places such as reading/
+  // writing LLVM-IR values to/from target memory. Some architectures use
+  // little-endian element ordering where element 0 is at the lowest address
+  // even when the architecture is otherwise big-endian (e.g. MIPS MSA, ARM
+  // NEON), but some architectures like PowerPC may use big-endian element
+  // ordering where element 0 is at the highest address.
+  virtual lldb::ByteOrder GetVectorElementOrder() const {
+    return lldb::eByteOrderLittle;
+  }
 };
 
 } // namespace lldb_private
diff --git a/lldb/include/lldb/Expression/IRInterpreter.h b/lldb/include/lldb/Expression/IRInterpreter.h
index 9106f1b4d1c3d..1c0f10aabed21 100644
--- a/lldb/include/lldb/Expression/IRInterpreter.h
+++ b/lldb/include/lldb/Expression/IRInterpreter.h
@@ -37,7 +37,8 @@ class IRInterpreter {
 public:
   static bool CanInterpret(llvm::Module &module, llvm::Function &function,
                            lldb_private::Status &error,
-                           const bool support_function_calls);
+                           const bool support_function_calls,
+                           lldb_private::ExecutionContext &exe_ctx);
 
   static bool Interpret(llvm::Module &module, llvm::Function &function,
                         llvm::ArrayRef<lldb::addr_t> args,
diff --git a/lldb/source/Expression/IRInterpreter.cpp b/lldb/source/Expression/IRInterpreter.cpp
index 91404831aeb9b..a01a3e989398d 100644
--- a/lldb/source/Expression/IRInterpreter.cpp
+++ b/lldb/source/Expression/IRInterpreter.cpp
@@ -70,6 +70,17 @@ static std::string PrintType(const Type *type, bool truncate = false) {
   return s;
 }
 
+static bool MemoryMatchesIRElementOrder(lldb_private::ExecutionContext &exe_ctx) {
+  lldb::TargetSP target_sp = exe_ctx.GetTargetSP();
+  if (target_sp) {
+    const auto *arch_plugin = target_sp->GetArchitecturePlugin();
+    if (arch_plugin) {
+      return arch_plugin->GetVectorElementOrder() == lldb::eByteOrderLittle;
+    }
+  }
+  return true; // Default to little-endian (matches IR)
+}
+
 static bool CanIgnoreCall(const CallInst *call) {
   const llvm::Function *called_function = call->getCalledFunction();
 
@@ -162,7 +173,7 @@ class InterpreterStackFrame {
   }
 
   bool EvaluateValue(lldb_private::Scalar &scalar, const Value *value,
-                     Module &module) {
+                     Module &module, lldb_private::ExecutionContext &exe_ctx) {
     const Constant *constant = dyn_cast<Constant>(value);
 
     if (constant) {
@@ -186,7 +197,7 @@ class InterpreterStackFrame {
       return AssignToMatchType(scalar, value_apint, value->getType());
     }
 
-    lldb::addr_t process_address = ResolveValue(value, module);
+    lldb::addr_t process_address = ResolveValue(value, module, exe_ctx);
     size_t value_size = m_target_data.getTypeStoreSize(value->getType());
 
     lldb_private::DataExtractor value_extractor;
@@ -218,8 +229,8 @@ class InterpreterStackFrame {
   }
 
   bool AssignValue(const Value *value, lldb_private::Scalar scalar,
-                   Module &module) {
-    lldb::addr_t process_address = ResolveValue(value, module);
+                   Module &module, lldb_private::ExecutionContext &exe_ctx) {
+    lldb::addr_t process_address = ResolveValue(value, module, exe_ctx);
 
     if (process_address == LLDB_INVALID_ADDRESS)
       return false;
@@ -367,7 +378,68 @@ class InterpreterStackFrame {
     return true;
   }
 
-  bool ResolveConstant(lldb::addr_t process_address, const Constant *constant) {
+  bool ResolveVectorConstant(lldb::addr_t process_address,
+                             const Constant *constant,
+                             lldb_private::ExecutionContext &exe_ctx) {
+    auto *vector_type = dyn_cast<FixedVectorType>(constant->getType());
+    if (!vector_type)
+      return false;
+
+    Type *element_type = vector_type->getElementType();
+    unsigned num_elements = vector_type->getNumElements();
+    size_t element_size = m_target_data.getTypeStoreSize(element_type);
+    size_t total_size = element_size * num_elements;
+    bool reverse_elements = !MemoryMatchesIRElementOrder(exe_ctx);
+
+    lldb_private::DataBufferHeap buf(total_size, 0);
+    uint8_t *data_ptr = buf.GetBytes();
+
+    if (isa<ConstantAggregateZero>(constant)) {
+      // Zero initializer - buffer is already zeroed, just write it
+      lldb_private::Status write_error;
+      m_execution_unit.WriteMemory(process_address, buf.GetBytes(),
+                                   buf.GetByteSize(), write_error);
+      return write_error.Success();
+    }
+
+    if (const ConstantDataVector *cdv = dyn_cast<ConstantDataVector>(constant)) {
+      for (unsigned i = 0; i < num_elements; ++i) {
+        const Constant *element = cdv->getElementAsConstant(i);
+        APInt element_value;
+        if (!ResolveConstantValue(element_value, element))
+          return false;
+
+        // Calculate target offset based on element ordering
+        unsigned target_index =
+            !reverse_elements ? i : (num_elements - 1 - i);
+        size_t offset = target_index * element_size;
+
+        lldb_private::Scalar element_scalar(
+            element_value.zextOrTrunc(element_size * 8));
+        lldb_private::Status get_data_error;
+        if (!element_scalar.GetAsMemoryData(data_ptr + offset,
+                                          element_size, m_byte_order,
+                                          get_data_error))
+          return false;
+      }
+      lldb_private::Status write_error;
+      m_execution_unit.WriteMemory(process_address, buf.GetBytes(),
+                                   buf.GetByteSize(), write_error);
+
+      return write_error.Success();
+    }
+
+    return false;
+  }
+
+  bool ResolveConstant(lldb::addr_t process_address, const Constant *constant,
+                       lldb_private::ExecutionContext &exe_ctx) {
+    // Handle vector constants specially since they can't be represented as a
+    // single APInt
+    if (constant->getType()->isVectorTy()) {
+      return ResolveVectorConstant(process_address, constant, exe_ctx);
+    }
+
     APInt resolved_value;
 
     if (!ResolveConstantValue(resolved_value, constant))
@@ -436,7 +508,8 @@ class InterpreterStackFrame {
     return std::string(ss.GetString());
   }
 
-  lldb::addr_t ResolveValue(const Value *value, Module &module) {
+  lldb::addr_t ResolveValue(const Value *value, Module &module,
+                            lldb_private::ExecutionContext &exe_ctx) {
     ValueMap::iterator i = m_values.find(value);
 
     if (i != m_values.end())
@@ -447,7 +520,7 @@ class InterpreterStackFrame {
     lldb::addr_t data_address = Malloc(value->getType());
 
     if (const Constant *constant = dyn_cast<Constant>(value)) {
-      if (!ResolveConstant(data_address, constant)) {
+      if (!ResolveConstant(data_address, constant, exe_ctx)) {
         lldb_private::Status free_error;
         m_execution_unit.Free(data_address, free_error);
         return LLDB_INVALID_ADDRESS;
@@ -484,8 +557,13 @@ static bool CanResolveConstant(llvm::Constant *constant) {
     return false;
   case Value::ConstantIntVal:
   case Value::ConstantFPVal:
+    return true;
+  case Value::ConstantDataVectorVal:
   case Value::FunctionVal:
     return true;
+  case Value::ConstantAggregateZeroVal:
+    // Zero initializers can be resolved
+    return true;
   case Value::ConstantExprVal:
     if (const ConstantExpr *constant_expr = dyn_cast<ConstantExpr>(constant)) {
       switch (constant_expr->getOpcode()) {
@@ -522,7 +600,8 @@ static bool CanResolveConstant(llvm::Constant *constant) {
 
 bool IRInterpreter::CanInterpret(llvm::Module &module, llvm::Function &function,
                                  lldb_private::Status &error,
-                                 const bool support_function_calls) {
+                                 const bool support_function_calls,
+                                 lldb_private::ExecutionContext &exe_ctx) {
   lldb_private::Log *log(GetLog(LLDBLog::Expressions));
 
   bool saw_function_with_body = false;
@@ -551,6 +630,7 @@ bool IRInterpreter::CanInterpret(llvm::Module &module, llvm::Function &function,
       case Instruction::BitCast:
       case Instruction::Br:
       case Instruction::PHI:
+      case Instruction::ExtractElement:
         break;
       case Instruction::Call: {
         CallInst *call_inst = dyn_cast<CallInst>(&ii);
@@ -644,7 +724,24 @@ bool IRInterpreter::CanInterpret(llvm::Module &module, llvm::Function &function,
         switch (operand_type->getTypeID()) {
         default:
           break;
-        case Type::FixedVectorTyID:
+        case Type::FixedVectorTyID: {
+          // If the element order is big-endian (highest index first) then it
+          // doesn't match LLVM-IR and must be transformed to correctly transfer
+          // between LLVM-IR and memory. This might not be fully implemented so
+          // decline to interpret this case.
+          if (exe_ctx.GetTargetPtr()) {
+            const auto *arch_plugin =
+                exe_ctx.GetTargetRef().GetArchitecturePlugin();
+            if (arch_plugin &&
+                arch_plugin->GetVectorElementOrder() == lldb::eByteOrderBig) {
+              LLDB_LOGF(log, "Unsupported big-endian vector element ordering");
+              error = lldb_private::Status::FromErrorString(
+                  "IR interpreter doesn't support big-endian vector element ordering");
+              return false;
+            }
+          }
+          break;
+        }
         case Type::ScalableVectorTyID: {
           LLDB_LOGF(log, "Unsupported operand type: %s",
                     PrintType(operand_type).c_str());
@@ -657,8 +754,9 @@ bool IRInterpreter::CanInterpret(llvm::Module &module, llvm::Function &function,
         // The IR interpreter currently doesn't know about
         // 128-bit integers. As they're not that frequent,
         // we can just fall back to the JIT rather than
-        // choking.
-        if (operand_type->getPrimitiveSizeInBits() > 64) {
+        // choking. However, allow vectors since we handle them above.
+        if (operand_type->getPrimitiveSizeInBits() > 64 &&
+            !operand_type->isVectorTy()) {
           LLDB_LOGF(log, "Unsupported operand type: %s",
                     PrintType(operand_type).c_str());
           error =
@@ -799,13 +897,13 @@ bool IRInterpreter::Interpret(llvm::Module &module, llvm::Function &function,
       lldb_private::Scalar L;
       lldb_private::Scalar R;
 
-      if (!frame.EvaluateValue(L, lhs, module)) {
+      if (!frame.EvaluateValue(L, lhs, module, exe_ctx)) {
         LLDB_LOGF(log, "Couldn't evaluate %s", PrintValue(lhs).c_str());
         error = lldb_private::Status::FromErrorString(bad_value_error);
         return false;
       }
 
-      if (!frame.EvaluateValue(R, rhs, module)) {
+      if (!frame.EvaluateValue(R, rhs, module, exe_ctx)) {
         LLDB_LOGF(log, "Couldn't evaluate %s", PrintValue(rhs).c_str());
         error = lldb_private::Status::FromErrorString(bad_value_error);
         return false;
@@ -872,7 +970,7 @@ bool IRInterpreter::Interpret(llvm::Module &module, llvm::Function &function,
         break;
       }
 
-      frame.AssignValue(inst, result, module);
+      frame.AssignValue(inst, result, module, exe_ctx);
 
       if (log) {
         LLDB_LOGF(log, "Interpreted a %s", inst->getOpcodeName());
@@ -947,13 +1045,13 @@ bool IRInterpreter::Interpret(llvm::Module &module, llvm::Function &function,
 
       lldb_private::Scalar S;
 
-      if (!frame.EvaluateValue(S, source, module)) {
+      if (!frame.EvaluateValue(S, source, module, exe_ctx)) {
         LLDB_LOGF(log, "Couldn't evaluate %s", PrintValue(source).c_str());
         error = lldb_private::Status::FromErrorString(bad_value_error);
         return false;
       }
 
-      frame.AssignValue(inst, S, module);
+      frame.AssignValue(inst, S, module, exe_ctx);
     } break;
     case Instruction::SExt: {
       const CastInst *cast_inst = cast<CastInst>(inst);
@@ -962,7 +1060,7 @@ bool IRInterpreter::Interpret(llvm::Module &module, llvm::Function &function,
 
       lldb_private::Scalar S;
 
-      if (!frame.EvaluateValue(S, source, module)) {
+      if (!frame.EvaluateValue(S, source, module, exe_ctx)) {
         LLDB_LOGF(log, "Couldn't evaluate %s", PrintValue(source).c_str());
         error = lldb_private::Status::FromErrorString(bad_value_error);
         return false;
@@ -972,7 +1070,7 @@ bool IRInterpreter::Interpret(llvm::Module &module, llvm::Function &function,
 
       lldb_private::Scalar S_signextend(S.SLongLong());
 
-      frame.AssignValue(inst, S_signextend, module);
+      frame.AssignValue(inst, S_signextend, module, exe_ctx);
     } break;
     case Instruction::Br: {
       const BranchInst *br_inst = cast<BranchInst>(inst);
@@ -982,7 +1080,7 @@ bool IRInterpreter::Interpret(llvm::Module &module, llvm::Function &function,
 
         lldb_private::Scalar C;
 
-        if (!frame.EvaluateValue(C, condition, module)) {
+        if (!frame.EvaluateValue(C, condition, module, exe_ctx)) {
           LLDB_LOGF(log, "Couldn't evaluate %s", PrintValue(condition).c_str());
           error = lldb_private::Status::FromErrorString(bad_value_error);
           return false;
@@ -1020,12 +1118,12 @@ bool IRInterpreter::Interpret(llvm::Module &module, llvm::Function &function,
 
       Value *value = phi_inst->getIncomingValueForBlock(frame.m_prev_bb);
       lldb_private::Scalar result;
-      if (!frame.EvaluateValue(result, value, module)) {
+      if (!frame.EvaluateValue(result, value, module, exe_ctx)) {
         LLDB_LOGF(log, "Couldn't evaluate %s", PrintValue(value).c_str());
         error = lldb_private::Status::FromErrorString(bad_value_error);
         return false;
       }
-      frame.AssignValue(inst, result, module);
+      frame.AssignValue(inst, result, module, exe_ctx);
 
       if (log) {
         LLDB_LOGF(log, "Interpreted a %s", inst->getOpcodeName());
@@ -1041,7 +1139,7 @@ bool IRInterpreter::Interpret(llvm::Module &module, llvm::Function &function,
 
       lldb_private::Scalar P;
 
-      if (!frame.EvaluateValue(P, pointer_operand, module)) {
+      if (!frame.EvaluateValue(P, pointer_operand, module, exe_ctx)) {
         LLDB_LOGF(log, "Couldn't evaluate %s",
                   PrintValue(pointer_operand).c_str());
         error = lldb_private::Status::FromErrorString(bad_value_error);
@@ -1063,7 +1161,7 @@ bool IRInterpreter::Interpret(llvm::Module &module, llvm::Function &function,
         if (!constant_index) {
           lldb_private::Scalar I;
 
-          if (!frame.EvaluateValue(I, *ii, module)) {
+          if (!frame.EvaluateValue(I, *ii, module, exe_ctx)) {
             LLDB_LOGF(log, "Couldn't evaluate %s", PrintValue(*ii).c_str());
             error = lldb_private::Status::FromErrorString(bad_value_error);
             return false;
@@ -1084,7 +1182,7 @@ bool IRInterpreter::Interpret(llvm::Module &module, llvm::Function &function,
 
       lldb_private::Scalar Poffset = P + offset;
 
-      frame.AssignValue(inst, Poffset, module);
+      frame.AssignValue(inst, Poffset, module, exe_ctx);
 
       if (log) {
         LLDB_LOGF(log, "Interpreted a GetElementPtrInst");
@@ -1105,13 +1203,13 @@ bool IRInterpreter::Interpret(llvm::Module &module, llvm::Function &function,
       lldb_private::Scalar L;
       lldb_private::Scalar R;
 
-      if (!frame.EvaluateValue(L, lhs, module)) {
+      if (!frame.EvaluateValue(L, lhs, module, exe_ctx)) {
         LLDB_LOGF(log, "Couldn't evaluate %s", PrintValue(lhs).c_str());
         error = lldb_private::Status::FromErrorString(bad_value_error);
         return false;
       }
 
-      if (!frame.EvaluateValue(R, rhs, module)) {
+      if (!frame.EvaluateValue(R, rhs, module, exe_ctx)) {
         LLDB_LOGF(log, "Couldn't evaluate %s", PrintValue(rhs).c_str());
         error = lldb_private::Status::FromErrorString(bad_value_error);
         return false;
@@ -1184,7 +1282,7 @@ bool IRInterpreter::Interpret(llvm::Module &module, llvm::Function &function,
         break;
       }
 
-      frame.AssignValue(inst, result, module);
+      frame.AssignValue(inst, result, module, exe_ctx);
 
       if (log) {
         LLDB_LOGF(log, "Interpreted an ICmpInst");
@@ -1200,13 +1298,13 @@ bool IRInterpreter::Interpret(llvm::Module &module, llvm::Function &function,
 
       lldb_private::Scalar I;
 
-      if (!frame.EvaluateValue(I, src_operand, module)) {
+      if (!frame.EvaluateValue(I, src_operand, module, exe_ctx)) {
         LLDB_LOGF(log, "Couldn't evaluate %s", PrintValue(src_operand).c_str());
         error = lldb_private::Status::FromErrorString(bad_value_error);
         return false;
       }
 
-      frame.AssignValue(inst, I, module);
+      frame.AssignValue(inst, I, module, exe_ctx);
 
       if (log) {
         LLDB_LOGF(log, "Interpreted an IntToPtr");
@@ -1221,13 +1319,13 @@ bool IRInterpreter::Interpret(llvm::Module &module, llvm::Function &function,
 
       lldb_private::Scalar I;
 
-      if (!frame.EvaluateValue(I, src_operand, module)) {
+      if (!frame.EvaluateValue(I, src_operand, module, exe_ctx)) {
         LLDB_LOGF(log, "Couldn't evaluate %s", PrintValue(src_operand).c_str());
         error = lldb_private::Status::FromErrorString(bad_value_error);
         return false;
       }
 
-      frame.AssignValue(inst, I, module);
+      frame.AssignValue(inst, I, module, exe_ctx);
 
       if (log) {
         LLDB_LOGF(log, "Interpreted a PtrToInt");
@@ -1242,13 +1340,13 @@ bool IRInterpreter::Interpret(llvm::Module &module, llvm::Function &function,
 
       lldb_private::Scalar I;
 
-      if (!frame.EvaluateValue(I, src_operand, module)) {
+      if (!frame.EvaluateValue(I, src_operand, module, exe_ctx)) {
         LLDB_LOGF(log, "Couldn't evaluate %s", PrintValue(src_operand).c_str());
         error = lldb_private::Status::FromErrorString(bad_value_error);
         return false;
       }
 
-      frame.AssignValue(inst, I, module);
+      frame.AssignValue(inst, I, module, exe_ctx);
 
       if (log) {
         LLDB_LOGF(log, "Interpreted a Trunc");
@@ -1267,8 +1365,8 @@ bool IRInterpreter::Interpret(llvm::Module &module, llvm::Function &function,
 
       const Value *pointer_operand = load_inst->getPointerOperand();
 
-      lldb::addr_t D = frame.ResolveValue(load_inst, module);
-      lldb::addr_t P = frame.ResolveValue(pointer_operand, module);
+      lldb::addr_t D = frame.ResolveValue(load_inst, module, exe_ctx);
+      lldb::addr_t P = frame.ResolveValue(pointer_operand, module, exe_ctx);
 
       if (D == LLDB_INVALID_ADDRESS) {
         LLDB_LOGF(log, "LoadInst's value doesn't resolve to anything");
@@ -1336,8 +1434,8 @@ bool IRInterpreter::Interpret(llvm::Module &module, llvm::Function &function,
       const Value *value_operand = store_inst->getValueOperand();
       const Value *pointer_operand = store_inst->getPointerOperand();
 
-      lldb::addr_t D = frame.ResolveValue(value_operand, module);
-      lldb::addr_t P = frame.ResolveValue(pointer_operand, module);
+      lldb::addr_t D = frame.ResolveValue(value_operand, module, exe_ctx);
+      lldb::addr_t P = frame.ResolveValue(pointer_operand, module, exe_ctx);
 
       if (D == LLDB_INVALID_ADDRESS) {
         LLDB_LOGF(log, "StoreInst's value doesn't resolve to anything");
@@ -1430,7 +1528,7 @@ bool IRInterpreter::Interpret(llvm::Module &module, llvm::Function &function,
       lldb_private::Scalar I;
       const llvm::Value *val = call_inst->getCalledOperand();
 
-      if (!frame.EvaluateValue(I, val, module)) {
+      if (!frame.EvaluateValue(I, val, module, exe_ctx)) {
         error = lldb_private::Status::FromErrorString(
             "unable to get address of function");
         return false;
@@ -1469,7 +1567,7 @@ bool IRInterpreter::Interpret(llvm::Module &module, llvm::Function &function,
 
         // Extract the arguments value
    ...
[truncated]
 | 
| You can test this locally with the following command:git-clang-format --diff origin/main HEAD --extensions cpp,h -- lldb/include/lldb/Core/Architecture.h lldb/include/lldb/Expression/IRInterpreter.h lldb/source/Expression/IRInterpreter.cpp lldb/source/Expression/Materializer.cpp lldb/source/Plugins/Architecture/PPC64/ArchitecturePPC64.cpp lldb/source/Plugins/Architecture/PPC64/ArchitecturePPC64.h lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp
 View the diff from clang-format here.diff --git a/lldb/source/Expression/IRInterpreter.cpp b/lldb/source/Expression/IRInterpreter.cpp
index 86c5ce3c6..3ee09ed3b 100644
--- a/lldb/source/Expression/IRInterpreter.cpp
+++ b/lldb/source/Expression/IRInterpreter.cpp
@@ -70,9 +70,8 @@ static std::string PrintType(const Type *type, bool truncate = false) {
   return s;
 }
 
-static bool
-isNonTrivialBitcast(const Instruction &inst,
-                    lldb_private::ExecutionContext &exe_ctx) {
+static bool isNonTrivialBitcast(const Instruction &inst,
+                                lldb_private::ExecutionContext &exe_ctx) {
   auto *result_type = dyn_cast<VectorType>(inst.getType());
   auto *operand = inst.getOperand(0);
   auto *operand_type = dyn_cast<VectorType>(operand->getType());
@@ -1725,8 +1724,7 @@ bool IRInterpreter::InterpretExtractElement(
   const Value *index_operand = extract_inst->getIndexOperand();
 
   // Get the vector address
-  lldb::addr_t vector_addr =
-      frame.ResolveValue(vector_operand, module);
+  lldb::addr_t vector_addr = frame.ResolveValue(vector_operand, module);
 
   if (vector_addr == LLDB_INVALID_ADDRESS) {
     LLDB_LOGF(log, "ExtractElement's vector doesn't resolve to anything");
 | 
| You can test this locally with the following command:darker --check --diff -r origin/main...HEAD lldb/test/API/commands/expression/expr-vec-elt-order/TestExprVectorElementOrder.py lldb/packages/Python/lldbsuite/test/gdbclientutils.py lldb/test/API/commands/expression/TestRegisterExpressionEndian.py lldb/test/API/functionalities/data-formatter/vector-types/TestVectorTypesFormatting.py
 View the diff from darker here.--- packages/Python/lldbsuite/test/gdbclientutils.py	2025-10-06 22:17:07.000000 +0000
+++ packages/Python/lldbsuite/test/gdbclientutils.py	2025-10-06 23:02:14.165530 +0000
@@ -307,12 +307,13 @@
 
     def haltReason(self):
         # SIGINT is 2, return type is 2 digit hex string
         return "S02"
 
-    def qXferRead(self, obj: str, annex: str, offset: int,
-                  length: int) -> tuple[str | None, bool]:
+    def qXferRead(
+        self, obj: str, annex: str, offset: int, length: int
+    ) -> tuple[str | None, bool]:
         return None, False
 
     def _qXferResponse(self, data, has_more):
         return "%s%s" % ("m" if has_more else "l", escape_binary(data))
 
--- test/API/commands/expression/TestRegisterExpressionEndian.py	2025-10-06 22:17:07.000000 +0000
+++ test/API/commands/expression/TestRegisterExpressionEndian.py	2025-10-06 23:02:14.238921 +0000
@@ -24,12 +24,13 @@
     def __init__(self, doc, endian):
         super().__init__()
         self.target_xml = doc
         self.endian = endian
 
-    def qXferRead(self, obj: str, annex:str , offset: int,
-                  length: int) -> tuple[str | None, bool]:
+    def qXferRead(
+        self, obj: str, annex: str, offset: int, length: int
+    ) -> tuple[str | None, bool]:
         if annex == "target.xml":
             return self.target_xml, False
         return (None, False)
 
     def readRegister(self, register: int):
--- test/API/commands/expression/expr-vec-elt-order/TestExprVectorElementOrder.py	2025-10-06 22:17:07.000000 +0000
+++ test/API/commands/expression/expr-vec-elt-order/TestExprVectorElementOrder.py	2025-10-06 23:02:14.353317 +0000
@@ -45,26 +45,25 @@
         self.target_xml = doc
         self.endian = endian
         self.element_order = element_order
 
     def qXferRead(self, obj, annex, offset, length) -> tuple[str | None, bool]:
-        if obj == 'features' and annex == "target.xml":
+        if obj == "features" and annex == "target.xml":
             more = offset + length < len(self.target_xml)
-            return self.target_xml[offset:offset+length], more
+            return self.target_xml[offset : offset + length], more
         return (None, False)
 
     def readRegister(self, register: int) -> str:
-        _ = register # Silence unused parameter hint
+        _ = register  # Silence unused parameter hint
         return "E01"
 
     def readRegisters(self) -> str:
         # 64 bit pc value.
         data = ["00", "00", "00", "00", "00", "00", "12", "34"]
         if self.endian == Endian.LITTLE:
             data.reverse()
         return "".join(data)
-
 
 
 class TestXMLRegisterFlags(GDBRemoteTestBase):
     def do_expr_eval(self, config_name: str):
         cfg = {
@@ -246,22 +245,61 @@
         self.runCmd("image lookup -t v4float", check=False)
         self.runCmd("image lookup -t float", check=False)
 
         # If expressions convert register values into target endian, the
         # vector should be stored correctly in memory.
-        self.expect("expr --language c++ -- (v4float){0.25, 0.5, 0.75, 1.0}", substrs=["0.25", "0.5", "0.75", "1"])
+        self.expect(
+            "expr --language c++ -- (v4float){0.25, 0.5, 0.75, 1.0}",
+            substrs=["0.25", "0.5", "0.75", "1"],
+        )
 
         # Check the raw bytes to verify endianness
-        result = self.frame().EvaluateExpression("(v4float){0.25, 0.5, 0.75, 1.0}", lldb.eDynamicCanRunTarget)
+        result = self.frame().EvaluateExpression(
+            "(v4float){0.25, 0.5, 0.75, 1.0}", lldb.eDynamicCanRunTarget
+        )
         self.assertTrue(result.IsValid())
         error = lldb.SBError()
         data = result.GetData()
         bytes_list = [data.GetUnsignedInt8(error, i) for i in range(16)]
         # For big-endian: 0x3e800000, 0x3f000000, 0x3f400000, 0x3f800000
         # For little-endian: bytes are reversed within each float
-        expected_big = [0x3e, 0x80, 0x00, 0x00, 0x3f, 0x00, 0x00, 0x00, 0x3f, 0x40, 0x00, 0x00, 0x3f, 0x80, 0x00, 0x00]
-        expected_little = [0x00, 0x00, 0x80, 0x3e, 0x00, 0x00, 0x00, 0x3f, 0x00, 0x00, 0x40, 0x3f, 0x00, 0x00, 0x80, 0x3f]
+        expected_big = [
+            0x3E,
+            0x80,
+            0x00,
+            0x00,
+            0x3F,
+            0x00,
+            0x00,
+            0x00,
+            0x3F,
+            0x40,
+            0x00,
+            0x00,
+            0x3F,
+            0x80,
+            0x00,
+            0x00,
+        ]
+        expected_little = [
+            0x00,
+            0x00,
+            0x80,
+            0x3E,
+            0x00,
+            0x00,
+            0x00,
+            0x3F,
+            0x00,
+            0x00,
+            0x40,
+            0x3F,
+            0x00,
+            0x00,
+            0x80,
+            0x3F,
+        ]
         if cfg.endian == Endian.BIG:
             self.assertEqual(bytes_list, expected_big)
         else:
             self.assertEqual(bytes_list, expected_little)
 
@@ -286,14 +324,14 @@
     def test_aarch64_little_endian_target(self):
         self.do_expr_eval("aarch64-le")
 
     # AArch64 doesn't seem to have implemented big-endian in lldb
     # Both big-endian and little-endian triples select the same ArchSpec.
-    #@skipIfXmlSupportMissing
-    #@skipIfRemote
-    #@skipIfLLVMTargetMissing("AArch64")
-    #def test_aarch64_big_endian(self):
+    # @skipIfXmlSupportMissing
+    # @skipIfRemote
+    # @skipIfLLVMTargetMissing("AArch64")
+    # def test_aarch64_big_endian(self):
     #    self.do_expr_eval("aarch64-be")
 
     @skipIfXmlSupportMissing
     @skipIfRemote
     @skipIfLLVMTargetMissing("PowerPC")
 | 
202ddf8    to
    3a260de      
    Compare
  
    There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| // Get the vector element order for this architecture. This determines how | |
| /// Get the vector element order for this architecture. This determines how | 
aa98810    to
    3b52f3d      
    Compare
  
    | private: | ||
| static std::unique_ptr<Architecture> Create(const ArchSpec &arch); | ||
| ArchitecturePPC64() = default; | ||
| ArchitecturePPC64(lldb::ByteOrder vector_element_order) | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why was PPC64 important to address as part of this PR? Is it because it's the only big-endian architecture plugin and you want to ensure we error out appropriately?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we split the architecture plugin changes into a separate PR?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's not the only big-endian target but it is the only target I know of where the order of vector elements doesn't match LLVM-IR's order. MIPS and ARM both have big-endian modes but vectors are 0-element first in both endians whereas big-endian PowerPC is highest-indexed element first. If I hadn't handled this then we'd read/write their vectors in reversed element order every time we tried to copy memory to/from an LLVM-IR value.
MIPS and ARM's vector layout has a different quirk on LLVM-IR/memory which is that bitcast isn't a no-op, it's a shuffle (which bytes swaps depends on the types involved). This is because it's defined as a store of the original type followed by a load of the new type. I haven't implemented this yet because I didn't need to support vector bitcast.
| const Value *vector_operand = extract_inst->getVectorOperand(); | ||
| const Value *index_operand = extract_inst->getIndexOperand(); | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We probably need nullptr checks here
| LLDB_LOGF(log, | ||
| "ExtractElement index %llu is out of bounds for vector with " | ||
| "%u elements", | ||
| (unsigned long long)index, num_elements); | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| LLDB_LOGF(log, | |
| "ExtractElement index %llu is out of bounds for vector with " | |
| "%u elements", | |
| (unsigned long long)index, num_elements); | |
| LLDB_LOG(log, | |
| "ExtractElement index {0} is out of bounds for vector with " | |
| "{1} elements", | |
| index, num_elements); | 
| LLDB_LOGF(log, "Interpreted an ExtractElement"); | ||
| LLDB_LOGF(log, " Vector: 0x%" PRIx64, vector_addr); | ||
| LLDB_LOGF(log, " Index: %llu", (unsigned long long)index); | ||
| LLDB_LOGF(log, " Element offset: %zu", element_offset); | ||
| LLDB_LOGF(log, " Result: 0x%" PRIx64, result_addr); | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you combin these into a single LLDB_LOG? You can replace the printf specificers with {0}, {1}, etc. too
| } | ||
| } break; | ||
| case Instruction::ExtractElement: { | ||
| const ExtractElementInst *extract_inst = cast<ExtractElementInst>(inst); | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you move all this into a separate helper function? To keep the function more readable
| default: | ||
| break; | ||
| case Type::FixedVectorTyID: | ||
| case Type::FixedVectorTyID: { | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Now that you allow vector types as operands to instructions, did you verify that the other instructions we support correctly get interpreted if the operand is a vector? From some skimming of the docs I see that, e.g., Bitcast requires some special handling for vector types.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I didn't check bitcast because it didn't come up in the expressions I was adding. I'll check it has appropriate guards. That should be the only one that's a bit weird (it's only an issue for big-endian ARM and MIPS). For the other instructions I was mostly leaning on the test suite showing issues if I broke something. I can have a look through them
| // Calculate target offset based on element ordering | ||
| unsigned target_index = !reverse_elements ? i : (num_elements - 1 - i); | ||
| size_t offset = target_index * element_size; | ||
|  | ||
| lldb_private::Scalar element_scalar( | ||
| element_value.zextOrTrunc(element_size * 8)); | ||
| lldb_private::Status get_data_error; | ||
| if (!element_scalar.GetAsMemoryData(data_ptr + offset, element_size, | ||
| m_byte_order, get_data_error)) | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I see we're duplicating some code between here and the Instruction::ExtractElement handling. Could we re-use some of that indexing logic?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For testing the endianess, I'm not sure we have PPC LLDB buildbots that would test this (@DavidSpickett or @JDevlieghere might know). Any chance this can be tested with unit-tests somehow? Maybe we can just pass an LLVM IR module into the IRInterpreter? We could potentially build something into lldb-test even. If that's too much hassle we can also just do that as a follow-up
| 
 I'm not sure we can. The problem arises from transferring LLVM-IR values to/from the targets memory. If we don't have the target consuming/producing that memory then our test could just be consistently-wrong and pass anyway. | 
Some targets like PowerPC store their - PowerPC little endian: little endian elements ordered 0, 1, 2, ... - PowerPC big endian: big endian elements ordered n-1, n-2, n-3, ... - ARM/MIPS little endian: little endian elements ordered 0, 1, 2, ... - ARM/MIPS big endian: big endian elements ordered 0, 1, 2, ... This matters when LLVM-IR values are transferred to/from target memory since LLVM-IR orders elements 0, 1, 2, ... regardless of endianness. This will be used in llvm#155000 by changes to the IRInterpreter to allow it to evaluate some vectors without executing on the target
| 
 Debian will build PPC little endian once in a while and somewhere in the depths of IBM there might be a machine but I've never seen a bug report from one. Also not sure what endians they ever supported. ISTR Linux is only supported on LE now but AIX is BE only. | 
| s390x is the other big endian target, I have no idea what its vectors do. | 
| 
 We can simulate a gdb-remote that's one of these unique architectures. It could check that the memory writes contain the right order of elements. Sometimes it's not possible because we have to mock too much stuff, but you can look at  | 
Everything in this should be python 3.9. The docs say the minimum is 3.8 but there's existing code in this suite that needs 3.9 so I think 3.9 is ok. Issues: qEcho() is passed an argument by the callers that the function didn't have Several functions in the base class would silently do nothing if not overriden. These now use @AbstractMethod to require overrides sendall() had inconsistent return types between overrides
While debugging the tests for PR155000 I found it helpful to have both sides of the simulated gdb-rsp traffic rather than just the responses so I've added a packetLog to MockGDBServer. The existing response-only one in MockGDBServerResponder is used by tests so I chose not to change it
Some targets like PowerPC store their - PowerPC little endian: little endian elements ordered 0, 1, 2, ... - PowerPC big endian: big endian elements ordered n-1, n-2, n-3, ... - ARM/MIPS little endian: little endian elements ordered 0, 1, 2, ... - ARM/MIPS big endian: big endian elements ordered 0, 1, 2, ... This matters when LLVM-IR values are transferred to/from target memory since LLVM-IR orders elements 0, 1, 2, ... regardless of endianness. This will be used in llvm#155000 by changes to the IRInterpreter to allow it to evaluate some vectors without executing on the target
This allows the debugger to evaluate expressions without requiring the
expression to be CodeGen'd and executed on the target. This should be more
efficient for many existing targets but is necessary for targets which are
not yet able to evaluate on the target.
In terms of memory layout, we have:
           | Element Values   | Element Order |
  ARM NEON | Endian-dependant | Zero-first    |
  MIPS MSA | Endian-dependant | Zero-first    |
  PowerPC  | Endian-dependant | Zero-first    |
  SystemZ  | Endian-dependant | Zero-first    |
Where Zero-first means that element zero of an array/vector is at the lowest
memory address.
In terms of register layout things, we have:
           | Element Values   | Element Order     | Effective Element Order
  ARM NEON | Endian-dependant | Zero-first        | Zero-first
  MIPS MSA | Endian-dependant | Zero-first        | Zero-first
  PowerPC  | Endian-dependant | Endian-dependant* | Zero-first (lane-swaps LE)
  SystemZ  | Endian-dependant | Endian-dependant* | Zero-first (lane-swaps BE)
*HW is endian-dependent but CodeGen accounts for it
PowerPC is a little more complicated than shown above as it actually supports
two modes: True-LE and Big-on-Little and the above table shows True-LE's
behaviour. See
https://llvm.org/devmtg/2014-10/Slides/Schmidt-SupportingVectorProgramming.pdf
I haven't seen evidence that big-on-little is implemented yet so I haven't
attempted to account for it.
The end result of this is that transferring values between llvm-ir and memory
is consistent between the four targets but transferring values between llvm-ir
and registers potentially requires transformations. I've therefore:
* Redefined GetVectorElementOrder to refer to the register layout not memory
* Made Materializer/Dematerializer bail out when reading/writing vector values.
* Made bitcast bail out for the cases where a bitcast is a shuffle rather than
  a nop.
    3b52f3d    to
    9b846c0      
    Compare
  
    | 
 
 I've managed to implement this and it did catch bugs as well as a major misunderstanding. Along the way I found some unrelated issues in the infrastructure (#162172) and added a packetLog that captures both directions of traffic to make debugging easier #162176. Big-endian AArch64 does not seem to be implemented in lldb (both triples lead to the same ArchSpec) so I have left that case out of the test. The PowerPC side of things went down a bit of a rabbit hole. To cut a long story short, it turns out PowerPC is doing the same lane-swap-via-codegen trick as SystemZ but it does it for the little-endian case. The case I thought it was doing is called Big-on-Little (see https://llvm.org/devmtg/2014-10/Slides/Schmidt-SupportingVectorProgramming.pdf). It presumably is in use somewhere but I've found little information on it so I don't know how we'd detect it and it doesn't seem to be what clang/llvm does. This information led to a major correction to the code as all four targets map llvm-ir values to memory the same way now. The difference is in the llvm-ir <-> register side and so I've moved the checks to the Materializer/Dematerializer and made it fall back on evaluation via injected code when they come up. Since most of the code changed I squashed the commits together as we weren't getting much value from keeping them separate anymore. AFAIK that does cause some problems with github's review tools but it's probably better than leaving all the intermediate commits. | 
While debugging the tests for #155000 I found it helpful to have both sides of the simulated gdb-rsp traffic rather than just the responses so I've extended the packetLog in MockGDBServerResponder to record traffic in both directions. Tests have been updated accordingly
* [flang] Fix standalone build regression from llvm#161179 (llvm#164309) Fix incorrect linking and dependencies introduced in llvm#161179 that break standalone builds of Flang. Signed-off-by: Michał Górny <[email protected]> * [AMDGPU] Remove magic constants from V_PK_ADD_F32 pattern. NFC (llvm#164335) * [AMDGPU] Update code sequence for CU-mode Release Fences in GFX10+ (llvm#161638) They were previously optimized to not emit any waitcnt, which is technically correct because there is no reordering of operations at workgroup scope in CU mode for GFX10+. This breaks transitivity however, for example if we have the following sequence of events in one thread: - some stores - store atomic release syncscope("workgroup") - barrier then another thread follows with - barrier - load atomic acquire - store atomic release syncscope("agent") It does not work because, while the other thread sees the stores, it cannot release them at the wider scope. Our release fences aren't strong enough to "wait" on stores from other waves. We also cannot strengthen our release fences any further to allow for releasing other wave's stores because only GFX12 can do that with `global_wb`. GFX10-11 do not have the writeback instruction. It'd also add yet another level of complexity to code sequences, with both acquire/release having CU-mode only alternatives. Lastly, acq/rel are always used together. The price for synchronization has to be paid either at the acq, or the rel. Strengthening the releases would just make the memory model more complex but wouldn't help performance. So the choice here is to streamline the code sequences by making CU and WGP mode emit almost identical (vL0 inv is not needed in CU mode) code for release (or stronger) atomic ordering. This also removes the `vm_vsrc(0)` wait before barriers. Now that the release fence in CU mode is strong enough, it is no longer needed. Supersedes llvm#160501 Solves SC1-6454 * [InstSimplify] Support ptrtoaddr in simplifyGEPInst() (llvm#164262) This adds support for ptrtoaddr in the `ptradd p, ptrtoaddr(p2) - ptrtoaddr(p) -> p2` fold. This fold requires that p and p2 have the same underlying object (otherwise the provenance may not be the same). The argument I would like to make here is that because the underlying objects are the same (and the pointers in the same address space), the non-address bits of the pointer must be the same. Looking at some specific cases of underlying object relationship: * phi/select: Trivially true. * getelementptr: Only modifies address bits, non-address bits must remain the same. * addrspacecast round-trip cast: Must preserve all bits because we optimize such round-trip casts away. * non-interposable global alias: I'm a bit unsure about this one, but I guess the alias and the aliasee must have the same non-address bits? * various intrinsics like launder.invariant.group, ptrmask. I think these all either preserve all pointer bits (like the invariant.group ones) or at least the non-address bits (like ptrmask). There are some interesting cases like amdgcn.make.buffer.rsrc, but those are cross address-space. ----- There is a second `gep (gep p, C), (sub 0, ptrtoint(p)) -> C` transform in this function, which I am not extending to handle ptrtoaddr, adding negative tests instead. This transform is overall dubious for provenance reasons, but especially dubious with ptrtoaddr, as then we don't have the guarantee that provenance of `p` has been exposed. * [Hexagon] Add REQUIRES: asserts to test This test uses -debug-only, so needs an assertion-enabled build. * [AArch64] Combing scalar_to_reg into DUP if the DUP already exists (llvm#160499) If we already have a dup(x) as part of the DAG along with a scalar_to_vec(x), we can re-use the result of the dup to the scalar_to_vec(x). * [CAS] OnDiskGraphDB - fix MSVC "not all control paths return a value" warnings. NFC. (llvm#164369) * Reapply "[libc++] Optimize __hash_table::erase(iterator, iterator)" (llvm#162850) This reapplication fixes the use after free caused by not properly updating the bucket list in one case. Original commit message: Instead of just calling the single element `erase` on every element of the range, we can combine some of the operations in a custom implementation. Specifically, we don't need to search for the previous node or re-link the list every iteration. Removing this unnecessary work results in some nice performance improvements: ``` ----------------------------------------------------------------------------------------------------------------------- Benchmark old new ----------------------------------------------------------------------------------------------------------------------- std::unordered_set<int>::erase(iterator, iterator) (erase half the container)/0 457 ns 459 ns std::unordered_set<int>::erase(iterator, iterator) (erase half the container)/32 995 ns 626 ns std::unordered_set<int>::erase(iterator, iterator) (erase half the container)/1024 18196 ns 7995 ns std::unordered_set<int>::erase(iterator, iterator) (erase half the container)/8192 124722 ns 70125 ns std::unordered_set<std::string>::erase(iterator, iterator) (erase half the container)/0 456 ns 461 ns std::unordered_set<std::string>::erase(iterator, iterator) (erase half the container)/32 1183 ns 769 ns std::unordered_set<std::string>::erase(iterator, iterator) (erase half the container)/1024 27827 ns 18614 ns std::unordered_set<std::string>::erase(iterator, iterator) (erase half the container)/8192 266681 ns 226107 ns std::unordered_map<int, int>::erase(iterator, iterator) (erase half the container)/0 455 ns 462 ns std::unordered_map<int, int>::erase(iterator, iterator) (erase half the container)/32 996 ns 659 ns std::unordered_map<int, int>::erase(iterator, iterator) (erase half the container)/1024 15963 ns 8108 ns std::unordered_map<int, int>::erase(iterator, iterator) (erase half the container)/8192 136493 ns 71848 ns std::unordered_multiset<int>::erase(iterator, iterator) (erase half the container)/0 454 ns 455 ns std::unordered_multiset<int>::erase(iterator, iterator) (erase half the container)/32 985 ns 703 ns std::unordered_multiset<int>::erase(iterator, iterator) (erase half the container)/1024 16277 ns 9085 ns std::unordered_multiset<int>::erase(iterator, iterator) (erase half the container)/8192 125736 ns 82710 ns std::unordered_multimap<int, int>::erase(iterator, iterator) (erase half the container)/0 457 ns 454 ns std::unordered_multimap<int, int>::erase(iterator, iterator) (erase half the container)/32 1091 ns 646 ns std::unordered_multimap<int, int>::erase(iterator, iterator) (erase half the container)/1024 17784 ns 7664 ns std::unordered_multimap<int, int>::erase(iterator, iterator) (erase half the container)/8192 127098 ns 72806 ns ``` This reverts commit acc3a62. * [TableGen] List the indices of sub-operands (llvm#163723) Some instances of the `Operand` class used in Tablegen instruction definitions expand to a cluster of multiple operands at the MC layer, such as complex addressing modes involving base + offset + shift, or clusters of operands describing conditional Arm instructions or predicated MVE instructions. There's currently no convenient way for C++ code to know the offset of one of those sub-operands from the start of the cluster: instead it just hard-codes magic numbers like `index+2`, which is hard to read and fragile. This patch adds an extra piece of output to `InstrInfoEmitter` to define those instruction offsets, based on the name of the `Operand` class instance in Tablegen, and the names assigned to the sub-operands in the `MIOperandInfo` field. For example, if target Foo were to define def Bar : Operand { let MIOperandInfo = (ops GPR:$first, i32imm:$second); // ... } then the new constants would be `Foo::SUBOP_Bar_first` and `Foo::SUBOP_Bar_second`, defined as 0 and 1 respectively. As an example, I've converted some magic numbers related to the MVE predication operand types (`vpred_n` and its superset `vpred_r`) to use the new named constants in place of the integer literals they previously used. This is more verbose, but also clearer, because it explains why the integer is chosen instead of what its value is. * [lldb] Add bidirectional packetLog to gdbclientutils.py (llvm#162176) While debugging the tests for llvm#155000 I found it helpful to have both sides of the simulated gdb-rsp traffic rather than just the responses so I've extended the packetLog in MockGDBServerResponder to record traffic in both directions. Tests have been updated accordingly * [MLIR] [Vector] Added canonicalizer for folding from_elements + transpose (llvm#161841) ## Description Adds a new canonicalizer that folds `vector.from_elements(vector.transpose))` => `vector.from_elements`. This canonicalization reorders the input elements for `vector.from_elements`, adjusts the output shape to match the effect of the transpose op and eliminating its need. ## Testing Added a 2D vector lit test that verifies the working of the rewrite. --------- Signed-off-by: Keshav Vinayak Jha <[email protected]> * [DA] Add initial support for monotonicity check (llvm#162280) The dependence testing functions in DA assume that the analyzed AddRec does not wrap over the entire iteration space. For AddRecs that may wrap, DA should conservatively return unknown dependence. However, no validation is currently performed to ensure that this condition holds, which can lead to incorrect results in some cases. This patch introduces the notion of *monotonicity* and a validation logic to check whether a SCEV is monotonic. The monotonicity check classifies the SCEV into one of the following categories: - Unknown: Nothing is known about the monotonicity of the SCEV. - Invariant: The SCEV is loop-invariant. - MultivariateSignedMonotonic: The SCEV doesn't wrap in a signed sense for any iteration of the loops in the loop nest. The current validation logic basically searches an affine AddRec recursively and checks whether the `nsw` flag is present. Notably, it is still unclear whether we should also have a category for unsigned monotonicity. The monotonicity check is still under development and disabled by default for now. Since such a check is necessary to make DA sound, it should be enabled by default once the functionality is sufficient. Split off from llvm#154527. * [VPlan] Use VPlan::getRegion to shorten code (NFC) (llvm#164287) * [VPlan] Improve code using m_APInt (NFC) (llvm#161683) * [SystemZ] Avoid trunc(add(X,X)) patterns (llvm#164378) Replace with trunc(add(X,Y)) to avoid premature folding in upcoming patch llvm#164227 * [clang][CodeGen] Emit `llvm.tbaa.errno` metadata during module creation Let Clang emit `llvm.tbaa.errno` metadata in order to let LLVM carry out optimizations around errno-writing libcalls to, as long as it is proved the involved memory location does not alias `errno`. Previous discussion: https://discourse.llvm.org/t/rfc-modelling-errno-memory-effects/82972. * [LV][NFC] Remove undef from phi incoming values (llvm#163762) Split off from PR llvm#163525, this standalone patch replaces use of undef as incoming PHI values with zero, in order to reduce the likelihood of contributors hitting the `undef deprecator` warning in github. * [DA] Add option to enable specific dependence test only (llvm#164245) PR llvm#157084 added an option `da-run-siv-routines-only` to run only SIV routines in DA. This PR replaces that option with a more fine-grained one that allows to select other than SIV routines as well. This option is useful for regression testing of individual DA routines. This patch also reorganizes regression tests that use `da-run-siv-routines-only`. * [libcxx] Optimize `std::generate_n` for segmented iterators (llvm#164266) Part of llvm#102817. This is a natural follow-up to llvm#163006. We are forwarding `std::generate_n` to `std::__for_each_n` (`std::for_each_n` needs c++17), resulting in improved performance for segmented iterators. before: ``` std::generate_n(deque<int>)/32 17.5 ns 17.3 ns 40727273 std::generate_n(deque<int>)/50 25.7 ns 25.5 ns 26352941 std::generate_n(deque<int>)/1024 490 ns 487 ns 1445161 std::generate_n(deque<int>)/8192 3908 ns 3924 ns 179200 ``` after: ``` std::generate_n(deque<int>)/32 11.1 ns 11.0 ns 64000000 std::generate_n(deque<int>)/50 16.1 ns 16.0 ns 44800000 std::generate_n(deque<int>)/1024 291 ns 292 ns 2357895 std::generate_n(deque<int>)/8192 2269 ns 2250 ns 298667 ``` * [BOLT] Check entry point address is not in constant island (llvm#163418) There are cases where `addEntryPointAtOffset` is called with a given `Offset` that points to an address within a constant island. This triggers `assert(!isInConstantIsland(EntryPointAddress)` and causes BOLT to crash. This patch adds a check which ignores functions that would add such entry points and warns the user. * [llvm][dwarfdump] Pretty-print DW_AT_language_version (llvm#164222) In both verbose and non-verbose mode we will now use the `llvm::dwarf::LanguageDescription` to turn the version into a human readable string. In verbose mode we also display the raw version code (similar to how we display addresses in verbose mode). To make the version code and prettified easier to distinguish, we print the prettified name in colour (if available), which is consistent with how `DW_AT_language` is printed in colour. Before: ``` 0x0000000c: DW_TAG_compile_unit DW_AT_language_name (DW_LNAME_C) DW_AT_language_version (201112) ``` After: ``` 0x0000000c: DW_TAG_compile_unit DW_AT_language_name (DW_LNAME_C) DW_AT_language_version (201112 C11) ``` --------- Signed-off-by: Michał Górny <[email protected]> Signed-off-by: Keshav Vinayak Jha <[email protected]> Co-authored-by: Michał Górny <[email protected]> Co-authored-by: Stanislav Mekhanoshin <[email protected]> Co-authored-by: Pierre van Houtryve <[email protected]> Co-authored-by: Nikita Popov <[email protected]> Co-authored-by: David Green <[email protected]> Co-authored-by: Simon Pilgrim <[email protected]> Co-authored-by: Nikolas Klauser <[email protected]> Co-authored-by: Simon Tatham <[email protected]> Co-authored-by: Daniel Sanders <[email protected]> Co-authored-by: Keshav Vinayak Jha <[email protected]> Co-authored-by: Ryotaro Kasuga <[email protected]> Co-authored-by: Ramkumar Ramachandra <[email protected]> Co-authored-by: Antonio Frighetto <[email protected]> Co-authored-by: David Sherwood <[email protected]> Co-authored-by: Connector Switch <[email protected]> Co-authored-by: Asher Dobrescu <[email protected]> Co-authored-by: Michael Buch <[email protected]>
While debugging the tests for llvm#155000 I found it helpful to have both sides of the simulated gdb-rsp traffic rather than just the responses so I've extended the packetLog in MockGDBServerResponder to record traffic in both directions. Tests have been updated accordingly
While debugging the tests for llvm#155000 I found it helpful to have both sides of the simulated gdb-rsp traffic rather than just the responses so I've extended the packetLog in MockGDBServerResponder to record traffic in both directions. Tests have been updated accordingly
This allows the debugger to evaluate expressions without requiring the
expression to be CodeGen'd and executed on the target. This should be more
efficient for many existing targets but is necessary for targets which are
not yet able to evaluate on the target.
As far as I know most targets have a vector memory layout that matches the
IR element order. Most little endian targets choose to use a little endian
element order, and two out of the three big endian targets I know of
(MIPS MSA and ARM NEON) choose to use little endian element order even
when the elements are big endian which matches LLVM-IR's order. The third
is PowerPC Altivec which has the highest indexed element first for
big-endian mode.
I've attempted to implement the correct element ordering on the relevant
operations but I don't really have a means to test the case where the
element order doesn't match LLVM-IR's element order so I've chosen to have
a guard against element order mismatches to ensure that this change can't
break expression evaluation on those targets.