Skip to content

Commit 7357dc6

Browse files
author
liuzhenya
committed
[CIR] X84 vector fcmp-sse vector builtins
1 parent 851f8f7 commit 7357dc6

File tree

4 files changed

+286
-11
lines changed

4 files changed

+286
-11
lines changed

clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,14 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {
131131
return cir::IntType::get(getContext(), n, false);
132132
}
133133

134+
static unsigned getCIRIntOrFloatBitWidth(mlir::Type eltTy) {
135+
if (auto intType = mlir::dyn_cast<cir::IntTypeInterface>(eltTy))
136+
return intType.getWidth();
137+
if (auto floatType = mlir::dyn_cast<cir::FPTypeInterface>(eltTy))
138+
return floatType.getWidth();
139+
140+
llvm_unreachable("Unsupported type in getCIRIntOrFloatBitWidth");
141+
}
134142
cir::IntType getSIntNTy(int n) {
135143
return cir::IntType::get(getContext(), n, true);
136144
}
@@ -584,6 +592,16 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {
584592
return cir::CmpOp::create(*this, loc, getBoolTy(), kind, lhs, rhs);
585593
}
586594

595+
cir::VecCmpOp createVecCompare(mlir::Location loc, cir::CmpOpKind kind,
596+
mlir::Value lhs, mlir::Value rhs) {
597+
VectorType vecCast = mlir::cast<VectorType>(lhs.getType());
598+
IntType integralTy =
599+
getSIntNTy(getCIRIntOrFloatBitWidth(vecCast.getElementType()));
600+
VectorType integralVecTy =
601+
VectorType::get(context, integralTy, vecCast.getSize());
602+
return cir::VecCmpOp::create(*this, loc, integralVecTy, kind, lhs, rhs);
603+
}
604+
587605
mlir::Value createIsNaN(mlir::Location loc, mlir::Value operand) {
588606
return createCompare(loc, cir::CmpOpKind::ne, operand, operand);
589607
}

clang/include/clang/CIR/MissingFeatures.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,7 @@ struct MissingFeatures {
258258
static bool emitBranchThroughCleanup() { return false; }
259259
static bool emitCheckedInBoundsGEP() { return false; }
260260
static bool emitCondLikelihoodViaExpectIntrinsic() { return false; }
261+
static bool emitConstrainedFPCall() { return false; }
261262
static bool emitLifetimeMarkers() { return false; }
262263
static bool emitLValueAlignmentAssumption() { return false; }
263264
static bool emitNullCheckForDeleteCalls() { return false; }

clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp

Lines changed: 54 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -34,18 +34,53 @@ static mlir::Value emitIntrinsicCallOp(CIRGenFunction &cgf, const CallExpr *e,
3434
.getResult();
3535
}
3636

37+
// OG has unordered comparison as a form of optimization in addition to
38+
// ordered comparison, while CIR doesn't.
39+
//
40+
// This means that we can't encode the comparison code of UGT (unordered
41+
// greater than), at least not at the CIR level.
42+
//
43+
// The boolean shouldInvert compensates for this.
44+
// For example: to get to the comparison code UGT, we pass in
45+
// emitVectorFCmp (OLE, shouldInvert = true) since OLE is the inverse of UGT.
46+
47+
// There are several ways to support this otherwise:
48+
// - register extra CmpOpKind for unordered comparison types and build the
49+
// translation code for
50+
// to go from CIR -> LLVM dialect. Notice we get this naturally with
51+
// shouldInvert, benefiting from existing infrastructure, albeit having to
52+
// generate an extra `not` at CIR).
53+
// - Just add extra comparison code to a new VecCmpOpKind instead of
54+
// cluttering CmpOpKind.
55+
// - Add a boolean in VecCmpOp to indicate if it's doing unordered or ordered
56+
// comparison
57+
// - Just emit the intrinsics call instead of calling this helper, see how the
58+
// LLVM lowering handles this.
59+
static mlir::Value emitVectorFCmp(CIRGenBuilderTy &builder,
60+
llvm::SmallVector<mlir::Value> &ops,
61+
mlir::Location loc, cir::CmpOpKind pred,
62+
bool shouldInvert) {
63+
assert(!cir::MissingFeatures::cgFPOptionsRAII());
64+
// TODO(cir): Add isSignaling boolean once emitConstrainedFPCall implemented
65+
assert(!cir::MissingFeatures::emitConstrainedFPCall());
66+
mlir::Value cmp = builder.createVecCompare(loc, pred, ops[0], ops[1]);
67+
mlir::Value bitCast = builder.createBitcast(
68+
shouldInvert ? builder.createNot(cmp) : cmp, ops[0].getType());
69+
return bitCast;
70+
}
71+
3772
mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
38-
const CallExpr *e) {
73+
const CallExpr *expr) {
3974
if (builtinID == Builtin::BI__builtin_cpu_is) {
40-
cgm.errorNYI(e->getSourceRange(), "__builtin_cpu_is");
75+
cgm.errorNYI(expr->getSourceRange(), "__builtin_cpu_is");
4176
return {};
4277
}
4378
if (builtinID == Builtin::BI__builtin_cpu_supports) {
44-
cgm.errorNYI(e->getSourceRange(), "__builtin_cpu_supports");
79+
cgm.errorNYI(expr->getSourceRange(), "__builtin_cpu_supports");
4580
return {};
4681
}
4782
if (builtinID == Builtin::BI__builtin_cpu_init) {
48-
cgm.errorNYI(e->getSourceRange(), "__builtin_cpu_init");
83+
cgm.errorNYI(expr->getSourceRange(), "__builtin_cpu_init");
4984
return {};
5085
}
5186

@@ -66,7 +101,7 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
66101
getContext().GetBuiltinType(builtinID, error, &iceArguments);
67102
assert(error == ASTContext::GE_None && "Error while getting builtin type.");
68103

69-
for (auto [idx, arg] : llvm::enumerate(e->arguments())) {
104+
for (auto [idx, arg] : llvm::enumerate(expr->arguments())) {
70105
ops.push_back(emitScalarOrConstFoldImmArg(iceArguments, idx, arg));
71106
}
72107

@@ -77,15 +112,15 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
77112
default:
78113
return {};
79114
case X86::BI_mm_clflush:
80-
return emitIntrinsicCallOp(*this, e, "x86.sse2.clflush", voidTy, ops[0]);
115+
return emitIntrinsicCallOp(*this, expr, "x86.sse2.clflush", voidTy, ops[0]);
81116
case X86::BI_mm_lfence:
82-
return emitIntrinsicCallOp(*this, e, "x86.sse2.lfence", voidTy);
117+
return emitIntrinsicCallOp(*this, expr, "x86.sse2.lfence", voidTy);
83118
case X86::BI_mm_pause:
84-
return emitIntrinsicCallOp(*this, e, "x86.sse2.pause", voidTy);
119+
return emitIntrinsicCallOp(*this, expr, "x86.sse2.pause", voidTy);
85120
case X86::BI_mm_mfence:
86-
return emitIntrinsicCallOp(*this, e, "x86.sse2.mfence", voidTy);
121+
return emitIntrinsicCallOp(*this, expr, "x86.sse2.mfence", voidTy);
87122
case X86::BI_mm_sfence:
88-
return emitIntrinsicCallOp(*this, e, "x86.sse.sfence", voidTy);
123+
return emitIntrinsicCallOp(*this, expr, "x86.sse.sfence", voidTy);
89124
case X86::BI_mm_prefetch:
90125
case X86::BI__rdtsc:
91126
case X86::BI__builtin_ia32_rdtscp:
@@ -741,10 +776,18 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
741776
case X86::BI__builtin_ia32_cmpunordpd:
742777
case X86::BI__builtin_ia32_cmpneqps:
743778
case X86::BI__builtin_ia32_cmpneqpd:
779+
cgm.errorNYI(expr->getSourceRange(),
780+
std::string("unimplemented X86 builtin call: ") +
781+
getContext().BuiltinInfo.getName(builtinID));
782+
return {};
744783
case X86::BI__builtin_ia32_cmpnltps:
745784
case X86::BI__builtin_ia32_cmpnltpd:
785+
return emitVectorFCmp(builder, ops, getLoc(expr->getExprLoc()),
786+
cir::CmpOpKind::lt, /*shouldInvert=*/true);
746787
case X86::BI__builtin_ia32_cmpnleps:
747788
case X86::BI__builtin_ia32_cmpnlepd:
789+
return emitVectorFCmp(builder, ops, getLoc(expr->getExprLoc()),
790+
cir::CmpOpKind::le, /*shouldInvert=*/true);
748791
case X86::BI__builtin_ia32_cmpordps:
749792
case X86::BI__builtin_ia32_cmpordpd:
750793
case X86::BI__builtin_ia32_cmpph128_mask:
@@ -829,7 +872,7 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
829872
case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3:
830873
case X86::BI__builtin_ia32_vfmaddcsh_round_mask3:
831874
case X86::BI__builtin_ia32_prefetchi:
832-
cgm.errorNYI(e->getSourceRange(),
875+
cgm.errorNYI(expr->getSourceRange(),
833876
std::string("unimplemented X86 builtin call: ") +
834877
getContext().BuiltinInfo.getName(builtinID));
835878
return {};

0 commit comments

Comments
 (0)