Skip to content

Conversation

@tbaederr
Copy link
Contributor

No description provided.

@llvmbot llvmbot added clang Clang issues not falling into any other category clang:frontend Language frontend issues, e.g. anything involving "Sema" clang:bytecode Issues for the clang bytecode constexpr interpreter labels Aug 14, 2025
@llvmbot
Copy link
Member

llvmbot commented Aug 14, 2025

@llvm/pr-subscribers-clang

Author: Timm Baeder (tbaederr)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/153601.diff

6 Files Affected:

  • (modified) clang/lib/AST/ByteCode/Compiler.cpp (+41-11)
  • (modified) clang/lib/AST/ByteCode/Compiler.h (+2)
  • (modified) clang/lib/AST/ByteCode/InterpBuiltin.cpp (+93-14)
  • (modified) clang/lib/AST/ByteCode/Program.cpp (+2-2)
  • (added) clang/test/AST/ByteCode/builtin-object-size-codegen.cpp (+53)
  • (modified) clang/test/Sema/builtin-object-size.c (+4)
diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp
index 8e651cf060620..d0e1a1ca0f7de 100644
--- a/clang/lib/AST/ByteCode/Compiler.cpp
+++ b/clang/lib/AST/ByteCode/Compiler.cpp
@@ -60,16 +60,18 @@ template <class Emitter> class OptionScope final {
 public:
   /// Root constructor, compiling or discarding primitives.
   OptionScope(Compiler<Emitter> *Ctx, bool NewDiscardResult,
-              bool NewInitializing)
+              bool NewInitializing, bool NewToLValue)
       : Ctx(Ctx), OldDiscardResult(Ctx->DiscardResult),
-        OldInitializing(Ctx->Initializing) {
+        OldInitializing(Ctx->Initializing), OldToLValue(NewToLValue) {
     Ctx->DiscardResult = NewDiscardResult;
     Ctx->Initializing = NewInitializing;
+    Ctx->ToLValue = NewToLValue;
   }
 
   ~OptionScope() {
     Ctx->DiscardResult = OldDiscardResult;
     Ctx->Initializing = OldInitializing;
+    Ctx->ToLValue = OldToLValue;
   }
 
 private:
@@ -78,6 +80,7 @@ template <class Emitter> class OptionScope final {
   /// Old discard flag to restore.
   bool OldDiscardResult;
   bool OldInitializing;
+  bool OldToLValue;
 };
 
 template <class Emitter>
@@ -222,6 +225,9 @@ bool Compiler<Emitter>::VisitCastExpr(const CastExpr *CE) {
 
   switch (CE->getCastKind()) {
   case CK_LValueToRValue: {
+    if (ToLValue && CE->getType()->isPointerType())
+      return this->delegate(SubExpr);
+
     if (SubExpr->getType().isVolatileQualified())
       return this->emitInvalidCast(CastKind::Volatile, /*Fatal=*/true, CE);
 
@@ -4140,13 +4146,13 @@ bool Compiler<Emitter>::VisitStmtExpr(const StmtExpr *E) {
 
 template <class Emitter> bool Compiler<Emitter>::discard(const Expr *E) {
   OptionScope<Emitter> Scope(this, /*NewDiscardResult=*/true,
-                             /*NewInitializing=*/false);
+                             /*NewInitializing=*/false, /*ToLValue=*/false);
   return this->Visit(E);
 }
 
 template <class Emitter> bool Compiler<Emitter>::delegate(const Expr *E) {
   // We're basically doing:
-  // OptionScope<Emitter> Scope(this, DicardResult, Initializing);
+  // OptionScope<Emitter> Scope(this, DicardResult, Initializing, ToLValue);
   // but that's unnecessary of course.
   return this->Visit(E);
 }
@@ -4174,7 +4180,7 @@ template <class Emitter> bool Compiler<Emitter>::visit(const Expr *E) {
   //  Otherwise,we have a primitive return value, produce the value directly
   //  and push it on the stack.
   OptionScope<Emitter> Scope(this, /*NewDiscardResult=*/false,
-                             /*NewInitializing=*/false);
+                             /*NewInitializing=*/false, /*ToLValue=*/ToLValue);
   return this->Visit(E);
 }
 
@@ -4183,7 +4189,13 @@ bool Compiler<Emitter>::visitInitializer(const Expr *E) {
   assert(!canClassify(E->getType()));
 
   OptionScope<Emitter> Scope(this, /*NewDiscardResult=*/false,
-                             /*NewInitializing=*/true);
+                             /*NewInitializing=*/true, /*ToLValue=*/false);
+  return this->Visit(E);
+}
+
+template <class Emitter> bool Compiler<Emitter>::visitAsLValue(const Expr *E) {
+  OptionScope<Emitter> Scope(this, /*NewDiscardResult=*/false,
+                             /*NewInitializing=*/false, /*ToLValue=*/true);
   return this->Visit(E);
 }
 
@@ -4944,7 +4956,6 @@ bool Compiler<Emitter>::visitAPValueInitializer(const APValue &Val,
 template <class Emitter>
 bool Compiler<Emitter>::VisitBuiltinCallExpr(const CallExpr *E,
                                              unsigned BuiltinID) {
-
   if (BuiltinID == Builtin::BI__builtin_constant_p) {
     // Void argument is always invalid and harder to handle later.
     if (E->getArg(0)->getType()->isVoidType()) {
@@ -4989,12 +5000,31 @@ bool Compiler<Emitter>::VisitBuiltinCallExpr(const CallExpr *E,
       return false;
   }
 
-  if (!Context::isUnevaluatedBuiltin(BuiltinID)) {
-    // Put arguments on the stack.
-    for (const auto *Arg : E->arguments()) {
-      if (!this->visit(Arg))
+  // Prepare function arguments including special cases.
+  switch (BuiltinID) {
+  case Builtin::BI__builtin_object_size:
+  case Builtin::BI__builtin_dynamic_object_size: {
+    assert(E->getNumArgs() == 2);
+    if (E->getArg(0)->isGLValue()) {
+      if (!this->visit(E->getArg(0)))
+        return false;
+
+    } else {
+      if (!this->visitAsLValue(E->getArg(0)))
         return false;
     }
+    if (!this->visit(E->getArg(1)))
+      return false;
+
+  } break;
+  default:
+    if (!Context::isUnevaluatedBuiltin(BuiltinID)) {
+      // Put arguments on the stack.
+      for (const auto *Arg : E->arguments()) {
+        if (!this->visit(Arg))
+          return false;
+      }
+    }
   }
 
   if (!this->emitCallBI(E, BuiltinID, E))
diff --git a/clang/lib/AST/ByteCode/Compiler.h b/clang/lib/AST/ByteCode/Compiler.h
index 901934e530ade..20571df0432f9 100644
--- a/clang/lib/AST/ByteCode/Compiler.h
+++ b/clang/lib/AST/ByteCode/Compiler.h
@@ -282,6 +282,7 @@ class Compiler : public ConstStmtVisitor<Compiler<Emitter>, bool>,
   /// been created. visitInitializer() then relies on a pointer to this
   /// variable being on top of the stack.
   bool visitInitializer(const Expr *E);
+  bool visitAsLValue(const Expr *E);
   /// Evaluates an expression for side effects and discards the result.
   bool discard(const Expr *E);
   /// Just pass evaluation on to \p E. This leaves all the parsing flags
@@ -426,6 +427,7 @@ class Compiler : public ConstStmtVisitor<Compiler<Emitter>, bool>,
   bool DiscardResult = false;
 
   bool InStmtExpr = false;
+  bool ToLValue = false;
 
   /// Flag inidicating if we're initializing an already created
   /// variable. This is set in visitInitializer().
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index b602b9731a6e3..554eb366d14f5 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2170,8 +2170,8 @@ static bool interp__builtin_memchr(InterpState &S, CodePtr OpPC,
   return true;
 }
 
-static unsigned computeFullDescSize(const ASTContext &ASTCtx,
-                                    const Descriptor *Desc) {
+static std::optional<unsigned> computeFullDescSize(const ASTContext &ASTCtx,
+                                                   const Descriptor *Desc) {
 
   if (Desc->isPrimitive())
     return ASTCtx.getTypeSizeInChars(Desc->getType()).getQuantity();
@@ -2183,8 +2183,7 @@ static unsigned computeFullDescSize(const ASTContext &ASTCtx,
   if (Desc->isRecord())
     return ASTCtx.getTypeSizeInChars(Desc->getType()).getQuantity();
 
-  llvm_unreachable("Unhandled descriptor type");
-  return 0;
+  return std::nullopt;
 }
 
 static unsigned computePointerOffset(const ASTContext &ASTCtx,
@@ -2192,7 +2191,7 @@ static unsigned computePointerOffset(const ASTContext &ASTCtx,
   unsigned Result = 0;
 
   Pointer P = Ptr;
-  while (P.isArrayElement() || P.isField()) {
+  while (P.isField() || P.isArrayElement()) {
     P = P.expand();
     const Descriptor *D = P.getFieldDesc();
 
@@ -2205,7 +2204,6 @@ static unsigned computePointerOffset(const ASTContext &ASTCtx,
         Result += ElemSize * P.getIndex();
       P = P.expand().getArray();
     } else if (P.isBaseClass()) {
-
       const auto *RD = cast<CXXRecordDecl>(D->asDecl());
       bool IsVirtual = Ptr.isVirtualBaseClass();
       P = P.getBase();
@@ -2234,30 +2232,111 @@ static unsigned computePointerOffset(const ASTContext &ASTCtx,
   return Result;
 }
 
+static bool pointsToLastObject(const Pointer &Ptr) {
+  Pointer P = Ptr;
+  while (!P.isRoot()) {
+
+    if (P.isArrayElement()) {
+      P = P.expand().getArray();
+      continue;
+    } else if (P.isBaseClass()) {
+      P = P.getBase();
+      continue;
+    }
+
+    Pointer Base = P.getBase();
+    if (const Record *R = Base.getRecord()) {
+      assert(P.getField());
+      if (P.getField()->getFieldIndex() != R->getNumFields() - 1)
+        return false;
+    }
+    P = Base;
+  }
+
+  return true;
+}
+
+/// Does Ptr point to the last object AND to a flexible array member?
+static bool isUserWritingOffTheEnd(const ASTContext &Ctx, const Pointer &Ptr) {
+  auto isFlexibleArrayMember = [&](const Descriptor *FieldDesc) {
+    using FAMKind = LangOptions::StrictFlexArraysLevelKind;
+    FAMKind StrictFlexArraysLevel =
+        Ctx.getLangOpts().getStrictFlexArraysLevel();
+
+    if (StrictFlexArraysLevel == FAMKind::Default)
+      return true;
+
+    unsigned NumElems = FieldDesc->getNumElems();
+    if (NumElems == 0 && StrictFlexArraysLevel != FAMKind::IncompleteOnly)
+      return true;
+
+    if (NumElems == 1 && StrictFlexArraysLevel == FAMKind::OneZeroOrIncomplete)
+      return true;
+    return false;
+  };
+
+  const Descriptor *FieldDesc = Ptr.getFieldDesc();
+  if (!FieldDesc->isArray())
+    return false;
+
+  return Ptr.isDummy() && pointsToLastObject(Ptr) &&
+         isFlexibleArrayMember(FieldDesc);
+}
+
 static bool interp__builtin_object_size(InterpState &S, CodePtr OpPC,
                                         const InterpFrame *Frame,
                                         const CallExpr *Call) {
   PrimType KindT = *S.getContext().classify(Call->getArg(1));
+  // From the GCC docs:
+  // Kind is an integer constant from 0 to 3. If the least significant bit is
+  // clear, objects are whole variables. If it is set, a closest surrounding
+  // subobject is considered the object a pointer points to. The second bit
+  // determines if maximum or minimum of remaining bytes is computed.
   [[maybe_unused]] unsigned Kind = popToAPSInt(S.Stk, KindT).getZExtValue();
 
   assert(Kind <= 3 && "unexpected kind");
-
+  bool UseFieldDesc = (Kind & 1u);
+  bool ReportMinimum = (Kind & 2u);
   const Pointer &Ptr = S.Stk.pop<Pointer>();
+  const ASTContext &ASTCtx = S.getASTContext();
 
-  if (Ptr.isZero())
+  if (Ptr.isZero() || !Ptr.isBlockPointer())
     return false;
 
-  const Descriptor *DeclDesc = Ptr.getDeclDesc();
-  if (!DeclDesc)
+  // We can't load through pointers.
+  if (Ptr.isDummy() && Ptr.getType()->isPointerType())
     return false;
 
-  const ASTContext &ASTCtx = S.getASTContext();
+  bool DetermineForCompleteObject = Ptr.getFieldDesc() == Ptr.getDeclDesc();
 
-  unsigned ByteOffset = computePointerOffset(ASTCtx, Ptr);
-  unsigned FullSize = computeFullDescSize(ASTCtx, DeclDesc);
+  if (ReportMinimum) {
+    if (!DetermineForCompleteObject && !UseFieldDecl && Ptr.isDummy())
+      return false;
+  } else {
+    if (!DetermineForCompleteObject &&
+        isUserWritingOffTheEnd(ASTCtx, Ptr.expand()))
+      return false;
+  }
 
-  pushInteger(S, FullSize - ByteOffset, Call->getType());
+  const Descriptor *Desc =
+      UseFieldDesc ? Ptr.getFieldDesc() : Ptr.getDeclDesc();
+  assert(Desc);
 
+  std::optional<unsigned> FullSize = computeFullDescSize(ASTCtx, Desc);
+  if (!FullSize)
+    return false;
+
+  unsigned ByteOffset;
+  if (UseFieldDesc && !Ptr.isBaseClass())
+    ByteOffset = computePointerOffset(ASTCtx, Ptr) -
+                 computePointerOffset(ASTCtx, Ptr.expand().atIndex(0).narrow());
+  else
+    ByteOffset = computePointerOffset(ASTCtx, Ptr);
+
+  assert(ByteOffset <= *FullSize);
+  unsigned Result = *FullSize - ByteOffset;
+
+  pushInteger(S, Result, Call->getType());
   return true;
 }
 
diff --git a/clang/lib/AST/ByteCode/Program.cpp b/clang/lib/AST/ByteCode/Program.cpp
index 2843b325fe025..749ae2510612c 100644
--- a/clang/lib/AST/ByteCode/Program.cpp
+++ b/clang/lib/AST/ByteCode/Program.cpp
@@ -164,8 +164,8 @@ unsigned Program::getOrCreateDummy(const DeclTy &D) {
     const auto *VD = cast<ValueDecl>(cast<const Decl *>(D));
     IsWeak = VD->isWeak();
     QT = VD->getType();
-    if (const auto *RT = QT->getAs<ReferenceType>())
-      QT = RT->getPointeeType();
+    if (QT->isPointerOrReferenceType())
+      QT = QT->getPointeeType();
   }
   assert(!QT.isNull());
 
diff --git a/clang/test/AST/ByteCode/builtin-object-size-codegen.cpp b/clang/test/AST/ByteCode/builtin-object-size-codegen.cpp
new file mode 100644
index 0000000000000..4e0e087245d72
--- /dev/null
+++ b/clang/test/AST/ByteCode/builtin-object-size-codegen.cpp
@@ -0,0 +1,53 @@
+// RUN: %clang_cc1 -fexperimental-new-constant-interpreter -triple x86_64-apple-darwin -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1                                         -triple x86_64-apple-darwin -emit-llvm -o - %s | FileCheck %s
+
+void foo() {
+  struct A { char buf[16]; };
+  struct B : A {};
+  struct C { int i; B bs[1]; } *c;
+
+  int gi;
+  // CHECK: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 false)
+  gi = __builtin_object_size(&c->bs[0], 0);
+  // CHECK: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 false)
+  gi = __builtin_object_size(&c->bs[0], 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1 false)
+  gi = __builtin_object_size(&c->bs[0], 2);
+  // CHECK: store i32 16
+  gi = __builtin_object_size(&c->bs[0], 3);
+}
+
+
+void foo2() {
+  struct A { int a; };
+  struct B { int b; };
+  struct C: public A, public B {};
+
+  C c;
+
+  int gi;
+  // CHECK: store i32 8
+  gi = __builtin_object_size(&c, 0);
+  // CHECK: store i32 8
+  gi = __builtin_object_size((A*)&c, 0);
+  // CHECK: store i32 4
+  gi = __builtin_object_size((B*)&c, 0);
+
+  // CHECK: store i32 8
+  gi = __builtin_object_size((char*)&c, 0);
+  // CHECK: store i32 8
+  gi = __builtin_object_size((char*)(A*)&c, 0);
+  // CHECK: store i32 4
+  gi = __builtin_object_size((char*)(B*)&c, 0);
+}
+
+
+typedef struct {
+  double c[0];
+  float f;
+} foofoo0_t;
+
+unsigned babar0(foofoo0_t *f) {
+  // CHECK: ret i32 0
+  return __builtin_object_size(f->c, 1);
+}
diff --git a/clang/test/Sema/builtin-object-size.c b/clang/test/Sema/builtin-object-size.c
index 20d4e2ab6da79..a763c24fd6620 100644
--- a/clang/test/Sema/builtin-object-size.c
+++ b/clang/test/Sema/builtin-object-size.c
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -fsyntax-only -triple x86_64-apple-darwin9 -verify %s
 // RUN: %clang_cc1 -DDYNAMIC -fsyntax-only -triple x86_64-apple-darwin9 -verify %s
 
+// RUN: %clang_cc1 -fsyntax-only -verify %s -fexperimental-new-constant-interpreter
+// RUN: %clang_cc1 -fsyntax-only -triple x86_64-apple-darwin9 -verify %s -fexperimental-new-constant-interpreter
+// RUN: %clang_cc1 -DDYNAMIC -fsyntax-only -triple x86_64-apple-darwin9 -verify %s -fexperimental-new-constant-interpreter
+
 #ifndef DYNAMIC
 #define OBJECT_SIZE_BUILTIN __builtin_object_size
 #else

@tbaederr tbaederr force-pushed the bos branch 5 times, most recently from ac4dfd3 to c746da2 Compare August 18, 2025 08:03
@tbaederr tbaederr merged commit 0d05c42 into llvm:main Aug 18, 2025
9 checks passed
Copy link
Collaborator

@shafik shafik left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will come back to this

bool NewInitializing, bool NewToLValue)
: Ctx(Ctx), OldDiscardResult(Ctx->DiscardResult),
OldInitializing(Ctx->Initializing) {
OldInitializing(Ctx->Initializing), OldToLValue(NewToLValue) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should you be using Ctx->ToLValue to initialize OldToLValue?


Pointer P = Ptr;
while (P.isArrayElement() || P.isField()) {
while (P.isField() || P.isArrayElement()) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess you switched the order b/d isField is way more likely to be true and short circuit? If so that deserve a comment for future folks to keep in mind.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, the previous order just caused some assertion failures in isArrayElement IIRC. This version works for more cases.

// Kind is an integer constant from 0 to 3. If the least significant bit is
// clear, objects are whole variables. If it is set, a closest surrounding
// subobject is considered the object a pointer points to. The second bit
// determines if maximum or minimum of remaining bytes is computed.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It unhelpfully does not tell you whether set of unset means maximum. I am guessing you had to derive that empirically. It would be nice if the documentation was more explicit.

if (Ptr.isZero())
if (Call->getArg(0)->HasSideEffects(ASTCtx)) {
// "If there are any side effects in them, it returns (size_t) -1
// for type 0 or 1 and (size_t) 0 for type 2 or 3."
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe instead of calling Kind you could call it Type b/c you edited the gcc documentation previously but failed to do so here. I think being more consistent w/ the documentation would help in future.

tbaederr added a commit to tbaederr/llvm-project that referenced this pull request Aug 24, 2025
Initialize the `OldToLValue` member with the actual old value of
`ToLValue`.

Pointed out by Shafik in llvm#153601 (comment)
tbaederr added a commit that referenced this pull request Aug 25, 2025
Initialize the `OldToLValue` member with the actual old value of
`ToLValue`.

Pointed out by Shafik in
#153601 (comment)
llvm-sync bot pushed a commit to arm/arm-toolchain that referenced this pull request Aug 25, 2025
Initialize the `OldToLValue` member with the actual old value of
`ToLValue`.

Pointed out by Shafik in
llvm/llvm-project#153601 (comment)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

clang:bytecode Issues for the clang bytecode constexpr interpreter clang:frontend Language frontend issues, e.g. anything involving "Sema" clang Clang issues not falling into any other category

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants