-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[clang][bytecode] Improve __builtin_{,dynamic_}object_size implementation #153601
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-clang Author: Timm Baeder (tbaederr) ChangesFull diff: https://github.com/llvm/llvm-project/pull/153601.diff 6 Files Affected:
diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp
index 8e651cf060620..d0e1a1ca0f7de 100644
--- a/clang/lib/AST/ByteCode/Compiler.cpp
+++ b/clang/lib/AST/ByteCode/Compiler.cpp
@@ -60,16 +60,18 @@ template <class Emitter> class OptionScope final {
public:
/// Root constructor, compiling or discarding primitives.
OptionScope(Compiler<Emitter> *Ctx, bool NewDiscardResult,
- bool NewInitializing)
+ bool NewInitializing, bool NewToLValue)
: Ctx(Ctx), OldDiscardResult(Ctx->DiscardResult),
- OldInitializing(Ctx->Initializing) {
+ OldInitializing(Ctx->Initializing), OldToLValue(NewToLValue) {
Ctx->DiscardResult = NewDiscardResult;
Ctx->Initializing = NewInitializing;
+ Ctx->ToLValue = NewToLValue;
}
~OptionScope() {
Ctx->DiscardResult = OldDiscardResult;
Ctx->Initializing = OldInitializing;
+ Ctx->ToLValue = OldToLValue;
}
private:
@@ -78,6 +80,7 @@ template <class Emitter> class OptionScope final {
/// Old discard flag to restore.
bool OldDiscardResult;
bool OldInitializing;
+ bool OldToLValue;
};
template <class Emitter>
@@ -222,6 +225,9 @@ bool Compiler<Emitter>::VisitCastExpr(const CastExpr *CE) {
switch (CE->getCastKind()) {
case CK_LValueToRValue: {
+ if (ToLValue && CE->getType()->isPointerType())
+ return this->delegate(SubExpr);
+
if (SubExpr->getType().isVolatileQualified())
return this->emitInvalidCast(CastKind::Volatile, /*Fatal=*/true, CE);
@@ -4140,13 +4146,13 @@ bool Compiler<Emitter>::VisitStmtExpr(const StmtExpr *E) {
template <class Emitter> bool Compiler<Emitter>::discard(const Expr *E) {
OptionScope<Emitter> Scope(this, /*NewDiscardResult=*/true,
- /*NewInitializing=*/false);
+ /*NewInitializing=*/false, /*ToLValue=*/false);
return this->Visit(E);
}
template <class Emitter> bool Compiler<Emitter>::delegate(const Expr *E) {
// We're basically doing:
- // OptionScope<Emitter> Scope(this, DicardResult, Initializing);
+ // OptionScope<Emitter> Scope(this, DicardResult, Initializing, ToLValue);
// but that's unnecessary of course.
return this->Visit(E);
}
@@ -4174,7 +4180,7 @@ template <class Emitter> bool Compiler<Emitter>::visit(const Expr *E) {
// Otherwise,we have a primitive return value, produce the value directly
// and push it on the stack.
OptionScope<Emitter> Scope(this, /*NewDiscardResult=*/false,
- /*NewInitializing=*/false);
+ /*NewInitializing=*/false, /*ToLValue=*/ToLValue);
return this->Visit(E);
}
@@ -4183,7 +4189,13 @@ bool Compiler<Emitter>::visitInitializer(const Expr *E) {
assert(!canClassify(E->getType()));
OptionScope<Emitter> Scope(this, /*NewDiscardResult=*/false,
- /*NewInitializing=*/true);
+ /*NewInitializing=*/true, /*ToLValue=*/false);
+ return this->Visit(E);
+}
+
+template <class Emitter> bool Compiler<Emitter>::visitAsLValue(const Expr *E) {
+ OptionScope<Emitter> Scope(this, /*NewDiscardResult=*/false,
+ /*NewInitializing=*/false, /*ToLValue=*/true);
return this->Visit(E);
}
@@ -4944,7 +4956,6 @@ bool Compiler<Emitter>::visitAPValueInitializer(const APValue &Val,
template <class Emitter>
bool Compiler<Emitter>::VisitBuiltinCallExpr(const CallExpr *E,
unsigned BuiltinID) {
-
if (BuiltinID == Builtin::BI__builtin_constant_p) {
// Void argument is always invalid and harder to handle later.
if (E->getArg(0)->getType()->isVoidType()) {
@@ -4989,12 +5000,31 @@ bool Compiler<Emitter>::VisitBuiltinCallExpr(const CallExpr *E,
return false;
}
- if (!Context::isUnevaluatedBuiltin(BuiltinID)) {
- // Put arguments on the stack.
- for (const auto *Arg : E->arguments()) {
- if (!this->visit(Arg))
+ // Prepare function arguments including special cases.
+ switch (BuiltinID) {
+ case Builtin::BI__builtin_object_size:
+ case Builtin::BI__builtin_dynamic_object_size: {
+ assert(E->getNumArgs() == 2);
+ if (E->getArg(0)->isGLValue()) {
+ if (!this->visit(E->getArg(0)))
+ return false;
+
+ } else {
+ if (!this->visitAsLValue(E->getArg(0)))
return false;
}
+ if (!this->visit(E->getArg(1)))
+ return false;
+
+ } break;
+ default:
+ if (!Context::isUnevaluatedBuiltin(BuiltinID)) {
+ // Put arguments on the stack.
+ for (const auto *Arg : E->arguments()) {
+ if (!this->visit(Arg))
+ return false;
+ }
+ }
}
if (!this->emitCallBI(E, BuiltinID, E))
diff --git a/clang/lib/AST/ByteCode/Compiler.h b/clang/lib/AST/ByteCode/Compiler.h
index 901934e530ade..20571df0432f9 100644
--- a/clang/lib/AST/ByteCode/Compiler.h
+++ b/clang/lib/AST/ByteCode/Compiler.h
@@ -282,6 +282,7 @@ class Compiler : public ConstStmtVisitor<Compiler<Emitter>, bool>,
/// been created. visitInitializer() then relies on a pointer to this
/// variable being on top of the stack.
bool visitInitializer(const Expr *E);
+ bool visitAsLValue(const Expr *E);
/// Evaluates an expression for side effects and discards the result.
bool discard(const Expr *E);
/// Just pass evaluation on to \p E. This leaves all the parsing flags
@@ -426,6 +427,7 @@ class Compiler : public ConstStmtVisitor<Compiler<Emitter>, bool>,
bool DiscardResult = false;
bool InStmtExpr = false;
+ bool ToLValue = false;
/// Flag inidicating if we're initializing an already created
/// variable. This is set in visitInitializer().
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index b602b9731a6e3..554eb366d14f5 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2170,8 +2170,8 @@ static bool interp__builtin_memchr(InterpState &S, CodePtr OpPC,
return true;
}
-static unsigned computeFullDescSize(const ASTContext &ASTCtx,
- const Descriptor *Desc) {
+static std::optional<unsigned> computeFullDescSize(const ASTContext &ASTCtx,
+ const Descriptor *Desc) {
if (Desc->isPrimitive())
return ASTCtx.getTypeSizeInChars(Desc->getType()).getQuantity();
@@ -2183,8 +2183,7 @@ static unsigned computeFullDescSize(const ASTContext &ASTCtx,
if (Desc->isRecord())
return ASTCtx.getTypeSizeInChars(Desc->getType()).getQuantity();
- llvm_unreachable("Unhandled descriptor type");
- return 0;
+ return std::nullopt;
}
static unsigned computePointerOffset(const ASTContext &ASTCtx,
@@ -2192,7 +2191,7 @@ static unsigned computePointerOffset(const ASTContext &ASTCtx,
unsigned Result = 0;
Pointer P = Ptr;
- while (P.isArrayElement() || P.isField()) {
+ while (P.isField() || P.isArrayElement()) {
P = P.expand();
const Descriptor *D = P.getFieldDesc();
@@ -2205,7 +2204,6 @@ static unsigned computePointerOffset(const ASTContext &ASTCtx,
Result += ElemSize * P.getIndex();
P = P.expand().getArray();
} else if (P.isBaseClass()) {
-
const auto *RD = cast<CXXRecordDecl>(D->asDecl());
bool IsVirtual = Ptr.isVirtualBaseClass();
P = P.getBase();
@@ -2234,30 +2232,111 @@ static unsigned computePointerOffset(const ASTContext &ASTCtx,
return Result;
}
+static bool pointsToLastObject(const Pointer &Ptr) {
+ Pointer P = Ptr;
+ while (!P.isRoot()) {
+
+ if (P.isArrayElement()) {
+ P = P.expand().getArray();
+ continue;
+ } else if (P.isBaseClass()) {
+ P = P.getBase();
+ continue;
+ }
+
+ Pointer Base = P.getBase();
+ if (const Record *R = Base.getRecord()) {
+ assert(P.getField());
+ if (P.getField()->getFieldIndex() != R->getNumFields() - 1)
+ return false;
+ }
+ P = Base;
+ }
+
+ return true;
+}
+
+/// Does Ptr point to the last object AND to a flexible array member?
+static bool isUserWritingOffTheEnd(const ASTContext &Ctx, const Pointer &Ptr) {
+ auto isFlexibleArrayMember = [&](const Descriptor *FieldDesc) {
+ using FAMKind = LangOptions::StrictFlexArraysLevelKind;
+ FAMKind StrictFlexArraysLevel =
+ Ctx.getLangOpts().getStrictFlexArraysLevel();
+
+ if (StrictFlexArraysLevel == FAMKind::Default)
+ return true;
+
+ unsigned NumElems = FieldDesc->getNumElems();
+ if (NumElems == 0 && StrictFlexArraysLevel != FAMKind::IncompleteOnly)
+ return true;
+
+ if (NumElems == 1 && StrictFlexArraysLevel == FAMKind::OneZeroOrIncomplete)
+ return true;
+ return false;
+ };
+
+ const Descriptor *FieldDesc = Ptr.getFieldDesc();
+ if (!FieldDesc->isArray())
+ return false;
+
+ return Ptr.isDummy() && pointsToLastObject(Ptr) &&
+ isFlexibleArrayMember(FieldDesc);
+}
+
static bool interp__builtin_object_size(InterpState &S, CodePtr OpPC,
const InterpFrame *Frame,
const CallExpr *Call) {
PrimType KindT = *S.getContext().classify(Call->getArg(1));
+ // From the GCC docs:
+ // Kind is an integer constant from 0 to 3. If the least significant bit is
+ // clear, objects are whole variables. If it is set, a closest surrounding
+ // subobject is considered the object a pointer points to. The second bit
+ // determines if maximum or minimum of remaining bytes is computed.
[[maybe_unused]] unsigned Kind = popToAPSInt(S.Stk, KindT).getZExtValue();
assert(Kind <= 3 && "unexpected kind");
-
+ bool UseFieldDesc = (Kind & 1u);
+ bool ReportMinimum = (Kind & 2u);
const Pointer &Ptr = S.Stk.pop<Pointer>();
+ const ASTContext &ASTCtx = S.getASTContext();
- if (Ptr.isZero())
+ if (Ptr.isZero() || !Ptr.isBlockPointer())
return false;
- const Descriptor *DeclDesc = Ptr.getDeclDesc();
- if (!DeclDesc)
+ // We can't load through pointers.
+ if (Ptr.isDummy() && Ptr.getType()->isPointerType())
return false;
- const ASTContext &ASTCtx = S.getASTContext();
+ bool DetermineForCompleteObject = Ptr.getFieldDesc() == Ptr.getDeclDesc();
- unsigned ByteOffset = computePointerOffset(ASTCtx, Ptr);
- unsigned FullSize = computeFullDescSize(ASTCtx, DeclDesc);
+ if (ReportMinimum) {
+ if (!DetermineForCompleteObject && !UseFieldDecl && Ptr.isDummy())
+ return false;
+ } else {
+ if (!DetermineForCompleteObject &&
+ isUserWritingOffTheEnd(ASTCtx, Ptr.expand()))
+ return false;
+ }
- pushInteger(S, FullSize - ByteOffset, Call->getType());
+ const Descriptor *Desc =
+ UseFieldDesc ? Ptr.getFieldDesc() : Ptr.getDeclDesc();
+ assert(Desc);
+ std::optional<unsigned> FullSize = computeFullDescSize(ASTCtx, Desc);
+ if (!FullSize)
+ return false;
+
+ unsigned ByteOffset;
+ if (UseFieldDesc && !Ptr.isBaseClass())
+ ByteOffset = computePointerOffset(ASTCtx, Ptr) -
+ computePointerOffset(ASTCtx, Ptr.expand().atIndex(0).narrow());
+ else
+ ByteOffset = computePointerOffset(ASTCtx, Ptr);
+
+ assert(ByteOffset <= *FullSize);
+ unsigned Result = *FullSize - ByteOffset;
+
+ pushInteger(S, Result, Call->getType());
return true;
}
diff --git a/clang/lib/AST/ByteCode/Program.cpp b/clang/lib/AST/ByteCode/Program.cpp
index 2843b325fe025..749ae2510612c 100644
--- a/clang/lib/AST/ByteCode/Program.cpp
+++ b/clang/lib/AST/ByteCode/Program.cpp
@@ -164,8 +164,8 @@ unsigned Program::getOrCreateDummy(const DeclTy &D) {
const auto *VD = cast<ValueDecl>(cast<const Decl *>(D));
IsWeak = VD->isWeak();
QT = VD->getType();
- if (const auto *RT = QT->getAs<ReferenceType>())
- QT = RT->getPointeeType();
+ if (QT->isPointerOrReferenceType())
+ QT = QT->getPointeeType();
}
assert(!QT.isNull());
diff --git a/clang/test/AST/ByteCode/builtin-object-size-codegen.cpp b/clang/test/AST/ByteCode/builtin-object-size-codegen.cpp
new file mode 100644
index 0000000000000..4e0e087245d72
--- /dev/null
+++ b/clang/test/AST/ByteCode/builtin-object-size-codegen.cpp
@@ -0,0 +1,53 @@
+// RUN: %clang_cc1 -fexperimental-new-constant-interpreter -triple x86_64-apple-darwin -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm -o - %s | FileCheck %s
+
+void foo() {
+ struct A { char buf[16]; };
+ struct B : A {};
+ struct C { int i; B bs[1]; } *c;
+
+ int gi;
+ // CHECK: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 false)
+ gi = __builtin_object_size(&c->bs[0], 0);
+ // CHECK: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 false)
+ gi = __builtin_object_size(&c->bs[0], 1);
+ // CHECK: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1 false)
+ gi = __builtin_object_size(&c->bs[0], 2);
+ // CHECK: store i32 16
+ gi = __builtin_object_size(&c->bs[0], 3);
+}
+
+
+void foo2() {
+ struct A { int a; };
+ struct B { int b; };
+ struct C: public A, public B {};
+
+ C c;
+
+ int gi;
+ // CHECK: store i32 8
+ gi = __builtin_object_size(&c, 0);
+ // CHECK: store i32 8
+ gi = __builtin_object_size((A*)&c, 0);
+ // CHECK: store i32 4
+ gi = __builtin_object_size((B*)&c, 0);
+
+ // CHECK: store i32 8
+ gi = __builtin_object_size((char*)&c, 0);
+ // CHECK: store i32 8
+ gi = __builtin_object_size((char*)(A*)&c, 0);
+ // CHECK: store i32 4
+ gi = __builtin_object_size((char*)(B*)&c, 0);
+}
+
+
+typedef struct {
+ double c[0];
+ float f;
+} foofoo0_t;
+
+unsigned babar0(foofoo0_t *f) {
+ // CHECK: ret i32 0
+ return __builtin_object_size(f->c, 1);
+}
diff --git a/clang/test/Sema/builtin-object-size.c b/clang/test/Sema/builtin-object-size.c
index 20d4e2ab6da79..a763c24fd6620 100644
--- a/clang/test/Sema/builtin-object-size.c
+++ b/clang/test/Sema/builtin-object-size.c
@@ -2,6 +2,10 @@
// RUN: %clang_cc1 -fsyntax-only -triple x86_64-apple-darwin9 -verify %s
// RUN: %clang_cc1 -DDYNAMIC -fsyntax-only -triple x86_64-apple-darwin9 -verify %s
+// RUN: %clang_cc1 -fsyntax-only -verify %s -fexperimental-new-constant-interpreter
+// RUN: %clang_cc1 -fsyntax-only -triple x86_64-apple-darwin9 -verify %s -fexperimental-new-constant-interpreter
+// RUN: %clang_cc1 -DDYNAMIC -fsyntax-only -triple x86_64-apple-darwin9 -verify %s -fexperimental-new-constant-interpreter
+
#ifndef DYNAMIC
#define OBJECT_SIZE_BUILTIN __builtin_object_size
#else
|
ac4dfd3 to
c746da2
Compare
shafik
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I will come back to this
| bool NewInitializing, bool NewToLValue) | ||
| : Ctx(Ctx), OldDiscardResult(Ctx->DiscardResult), | ||
| OldInitializing(Ctx->Initializing) { | ||
| OldInitializing(Ctx->Initializing), OldToLValue(NewToLValue) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should you be using Ctx->ToLValue to initialize OldToLValue?
|
|
||
| Pointer P = Ptr; | ||
| while (P.isArrayElement() || P.isField()) { | ||
| while (P.isField() || P.isArrayElement()) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I guess you switched the order b/d isField is way more likely to be true and short circuit? If so that deserve a comment for future folks to keep in mind.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No, the previous order just caused some assertion failures in isArrayElement IIRC. This version works for more cases.
| // Kind is an integer constant from 0 to 3. If the least significant bit is | ||
| // clear, objects are whole variables. If it is set, a closest surrounding | ||
| // subobject is considered the object a pointer points to. The second bit | ||
| // determines if maximum or minimum of remaining bytes is computed. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It unhelpfully does not tell you whether set of unset means maximum. I am guessing you had to derive that empirically. It would be nice if the documentation was more explicit.
| if (Ptr.isZero()) | ||
| if (Call->getArg(0)->HasSideEffects(ASTCtx)) { | ||
| // "If there are any side effects in them, it returns (size_t) -1 | ||
| // for type 0 or 1 and (size_t) 0 for type 2 or 3." |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe instead of calling Kind you could call it Type b/c you edited the gcc documentation previously but failed to do so here. I think being more consistent w/ the documentation would help in future.
Initialize the `OldToLValue` member with the actual old value of `ToLValue`. Pointed out by Shafik in llvm#153601 (comment)
Initialize the `OldToLValue` member with the actual old value of `ToLValue`. Pointed out by Shafik in #153601 (comment)
Initialize the `OldToLValue` member with the actual old value of `ToLValue`. Pointed out by Shafik in llvm/llvm-project#153601 (comment)
No description provided.