[CIR] Upstream X86 builtin clflush, fence and pause #167401

HendrikHuebner · 2025-11-10T22:10:00Z

This PR upstreams the intrinsics _mm_prefetch, _mm_(l|m)fenche, _mm_pause and _mm_clflush from the incubator repository.

llvmbot · 2025-11-10T22:10:36Z

@llvm/pr-subscribers-clangir

Author: Hendrik Hübner (HendrikHuebner)

Changes

This PR upstreams the intrinsics _mm_prefetch and _mm_clflush from the incubator repository.

Full diff: https://github.com/llvm/llvm-project/pull/167401.diff

7 Files Affected:

(modified) clang/include/clang/CIR/Dialect/IR/CIROps.td (+45)
(modified) clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp (+60)
(modified) clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp (+22)
(modified) clang/lib/CIR/CodeGen/CIRGenFunction.h (+4)
(modified) clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp (+14)
(added) clang/test/CIR/CodeGen/X86/sse-builtins.c (+23)
(added) clang/test/CIR/CodeGen/X86/sse2-builtins.c (+23)

diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td
index 16258513239d9..902b1fa64fb5b 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIROps.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td
@@ -413,6 +413,18 @@ def CIR_ConstantOp : CIR_Op<"const", [
 
     template <typename T>
     T getValueAttr() { return mlir::dyn_cast<T>(getValue()); }
+
+    llvm::APInt getIntValue() {
+      if (const auto intAttr = getValueAttr<cir::IntAttr>())
+        return intAttr.getValue();
+      llvm_unreachable("Expected an IntAttr in ConstantOp");
+    }
+
+    bool getBoolValue() {
+      if (const auto boolAttr = getValueAttr<cir::BoolAttr>())
+        return boolAttr.getValue();
+      llvm_unreachable("Expected a BoolAttr in ConstantOp");
+    }
   }];
 
   let hasFolder = 1;
@@ -2579,6 +2591,39 @@ def CIR_FuncOp : CIR_Op<"func", [
   }];
 }
 
+//===----------------------------------------------------------------------===//
+// LLVMIntrinsicCallOp
+//===----------------------------------------------------------------------===//
+
+def CIR_LLVMIntrinsicCallOp : CIR_Op<"llvm.intrinsic"> {
+  let summary = "Call to llvm intrinsic functions that is not defined in CIR";
+  let description = [{
+    `cir.llvm.intrinsic` operation represents a call-like expression which has
+    return type and arguments that maps directly to a llvm intrinsic.
+    It only records intrinsic `intrinsic_name`.
+  }];
+
+  let results = (outs Optional<CIR_AnyType>:$result);
+  let arguments = (ins
+                   StrAttr:$intrinsic_name, Variadic<CIR_AnyType>:$arg_ops);
+
+  let skipDefaultBuilders = 1;
+
+  let assemblyFormat = [{
+    $intrinsic_name $arg_ops `:` functional-type($arg_ops, $result) attr-dict
+  }];
+
+  let builders = [
+    OpBuilder<(ins "mlir::StringAttr":$intrinsic_name, "mlir::Type":$resType,
+              CArg<"mlir::ValueRange", "{}">:$operands), [{
+      $_state.addAttribute("intrinsic_name", intrinsic_name);
+      $_state.addOperands(operands);
+      if (resType)
+        $_state.addTypes(resType);
+    }]>,
+  ];
+}
+
 //===----------------------------------------------------------------------===//
 // CallOp
 //===----------------------------------------------------------------------===//
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 0198a9d4eb192..2f02dd0319cd0 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -21,6 +21,49 @@
 using namespace clang;
 using namespace clang::CIRGen;
 
+/// Get integer from a mlir::Value that is an int constant or a constant op.
+static int64_t getIntValueFromConstOp(mlir::Value val) {
+  return val.getDefiningOp<cir::ConstantOp>().getIntValue().getSExtValue();
+}
+
+static mlir::Value emitClFlush(CIRGenFunction& cgf,
+                               const CallExpr* e,
+                               mlir::Value& op) {
+    mlir::Type voidTy = cir::VoidType::get(&cgf.getMLIRContext());
+    mlir::Location location = cgf.getLoc(e->getExprLoc());
+    return cgf.getBuilder()
+        .create<cir::LLVMIntrinsicCallOp>(
+            location, cgf.getBuilder().getStringAttr("x86.sse2.clflush"),
+            voidTy, op)
+        .getResult();
+}
+
+static mlir::Value emitPrefetch(CIRGenFunction& cgf,
+                                const CallExpr* e,
+                                mlir::Value& addr,
+                                int64_t hint) {
+  CIRGenBuilderTy& builder = cgf.getBuilder();
+  mlir::Type voidTy = cir::VoidType::get(&cgf.getMLIRContext());
+  mlir::Type sInt32Ty = cir::IntType::get(&cgf.getMLIRContext(), 32, true);
+  mlir::Value address = builder.createPtrBitcast(addr, voidTy);
+  mlir::Location location = cgf.getLoc(e->getExprLoc());
+  mlir::Value rw =
+      cir::ConstantOp::create(builder, location,
+                              cir::IntAttr::get(sInt32Ty, (hint >> 2) & 0x1));
+  mlir::Value locality =
+      cir::ConstantOp::create(builder, location,
+                              cir::IntAttr::get(sInt32Ty, hint & 0x3));
+  mlir::Value data = cir::ConstantOp::create(builder, location,
+                                             cir::IntAttr::get(sInt32Ty, 1));
+
+  return cir::LLVMIntrinsicCallOp::create(
+             builder, location,
+             builder.getStringAttr("prefetch"), voidTy,
+             mlir::ValueRange{address, rw, locality, data})
+      .getResult();
+}
+
+
 mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
                                                const CallExpr *e) {
   if (builtinID == Builtin::BI__builtin_cpu_is) {
@@ -43,11 +86,28 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
   // Find out if any arguments are required to be integer constant expressions.
   assert(!cir::MissingFeatures::handleBuiltinICEArguments());
 
+  // The operands of the builtin call
+  llvm::SmallVector<mlir::Value, 4> ops;
+
+  // `ICEArguments` is a bitmap indicating whether the argument at the i-th bit
+  // is required to be a constant integer expression.
+  unsigned ICEArguments = 0;
+  ASTContext::GetBuiltinTypeError error;
+  getContext().GetBuiltinType(builtinID, error, &ICEArguments);
+  assert(error == ASTContext::GE_None && "Error while getting builtin type.");
+
+  const unsigned numArgs = e->getNumArgs();
+  for (unsigned i = 0; i != numArgs; i++) {
+    ops.push_back(emitScalarOrConstFoldImmArg(ICEArguments, i, e));
+  }
+
   switch (builtinID) {
   default:
     return {};
   case X86::BI_mm_prefetch:
+    return emitPrefetch(*this, e, ops[0], getIntValueFromConstOp(ops[1]));
   case X86::BI_mm_clflush:
+    return emitClFlush(*this, e, ops[0]);
   case X86::BI_mm_lfence:
   case X86::BI_mm_pause:
   case X86::BI_mm_mfence:
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
index 5eba5ba6c3df1..236f487afd9ba 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
@@ -1430,6 +1430,28 @@ mlir::Value CIRGenFunction::emitPromotedScalarExpr(const Expr *e,
   return ScalarExprEmitter(*this, builder).Visit(const_cast<Expr *>(e));
 }
 
+mlir::Value CIRGenFunction::emitScalarOrConstFoldImmArg(unsigned ICEArguments,
+                                                        unsigned index,
+                                                        const CallExpr *e) {
+  mlir::Value arg{};
+
+  // The bit at the specified index indicates whether the argument is required
+  // to be a constant integer expression.
+  bool isArgRequiredToBeConstant = (ICEArguments & (1 << index));
+
+  if (!isArgRequiredToBeConstant) {
+    arg = emitScalarExpr(e->getArg(index));
+  } else {
+    // If this is required to be a constant, constant fold it so that we
+    // know that the generated intrinsic gets a ConstantInt.
+    std::optional<llvm::APSInt> result =
+        e->getArg(index)->getIntegerConstantExpr(getContext());
+    assert(result && "Expected argument to be a constant");
+    arg = builder.getConstInt(getLoc(e->getSourceRange()), *result);
+  }
+  return arg;
+}
+
 [[maybe_unused]] static bool mustVisitNullValue(const Expr *e) {
   // If a null pointer expression's type is the C++0x nullptr_t and
   // the expression is not a simple literal, it must be evaluated
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h
index f879e580989f7..0ce8714b23e82 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.h
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h
@@ -1529,6 +1529,10 @@ class CIRGenFunction : public CIRGenTypeCache {
   mlir::Value emitScalarExpr(const clang::Expr *e,
                              bool ignoreResultAssign = false);
 
+  mlir::Value emitScalarOrConstFoldImmArg(unsigned ICEArguments,
+                                          unsigned index,
+                                          const CallExpr *e);
+
   mlir::Value emitScalarPrePostIncDec(const UnaryOperator *e, LValue lv,
                                       cir::UnaryOpKind kind, bool isPre);
 
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index b4afed7019417..f4379b402fe13 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -320,6 +320,20 @@ static mlir::LLVM::CallIntrinsicOp replaceOpWithCallLLVMIntrinsicOp(
   return callIntrinOp;
 }
 
+mlir::LogicalResult CIRToLLVMLLVMIntrinsicCallOpLowering::matchAndRewrite(
+    cir::LLVMIntrinsicCallOp op,
+    OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  mlir::Type llvmResTy =
+      getTypeConverter()->convertType(op->getResultTypes()[0]);
+  if (!llvmResTy)
+    return op.emitError("expected LLVM result type");
+  StringRef name = op.getIntrinsicName();
+  replaceOpWithCallLLVMIntrinsicOp(rewriter, op, "llvm." + name, llvmResTy,
+                                   adaptor.getOperands());
+  return mlir::success();
+}
+
 /// IntAttr visitor.
 mlir::Value CIRAttrToValue::visitCirAttr(cir::IntAttr intAttr) {
   mlir::Location loc = parentOp->getLoc();
diff --git a/clang/test/CIR/CodeGen/X86/sse-builtins.c b/clang/test/CIR/CodeGen/X86/sse-builtins.c
new file mode 100644
index 0000000000000..4dd141168ba66
--- /dev/null
+++ b/clang/test/CIR/CodeGen/X86/sse-builtins.c
@@ -0,0 +1,23 @@
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse -fclangir -emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse -fclangir -emit-llvm -o %t.ll -Wall -Werror
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse -fno-signed-char -fclangir -emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse -fclangir -emit-llvm -o %t.ll -Wall -Werror
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+
+// This test mimics clang/test/CodeGen/X86/sse-builtins.c, which eventually
+// CIR shall be able to support fully.
+
+#include <immintrin.h>
+
+
+void test_mm_prefetch(char const* p) {
+  // CIR-LABEL: test_mm_prefetch
+  // LLVM-LABEL: test_mm_prefetch
+  _mm_prefetch(p, 0);
+  // CIR: cir.prefetch read locality(0) %{{.*}} : !cir.ptr<!void>
+  // LLVM: call void @llvm.prefetch.p0(ptr {{.*}}, i32 0, i32 0, i32 1)
+}
diff --git a/clang/test/CIR/CodeGen/X86/sse2-builtins.c b/clang/test/CIR/CodeGen/X86/sse2-builtins.c
new file mode 100644
index 0000000000000..0c275fa089262
--- /dev/null
+++ b/clang/test/CIR/CodeGen/X86/sse2-builtins.c
@@ -0,0 +1,23 @@
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse2 -fclangir -emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefixes=CIR-CHECK --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse2 -fno-signed-char -fclangir -emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefixes=CIR-CHECK --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse2 -fclangir -emit-llvm -o %t.ll -Wall -Werror
+// RUN: FileCheck --check-prefixes=LLVM-CHECK --input-file=%t.ll %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse2 -fno-signed-char -fclangir -emit-llvm -o %t.ll -Wall -Werror
+// RUN: FileCheck --check-prefixes=LLVM-CHECK --input-file=%t.ll %s
+
+// This test mimics clang/test/CodeGen/X86/sse2-builtins.c, which eventually
+// CIR shall be able to support fully.
+
+#include <immintrin.h>
+
+
+void test_mm_clflush(void* A) {
+  // CIR-LABEL: test_mm_clflush
+  // LLVM-LABEL: teh
+  _mm_clflush(A);
+  // CIR-CHECK: {{%.*}} = cir.llvm.intrinsic "x86.sse2.clflush" {{%.*}} : (!cir.ptr<!void>) -> !void
+  // LLVM-CHECK: call void @llvm.x86.sse2.clflush(ptr {{%.*}})
+}

llvmbot · 2025-11-10T22:10:36Z

@llvm/pr-subscribers-clang

Author: Hendrik Hübner (HendrikHuebner)

Changes

This PR upstreams the intrinsics _mm_prefetch and _mm_clflush from the incubator repository.

Full diff: https://github.com/llvm/llvm-project/pull/167401.diff

7 Files Affected:

(modified) clang/include/clang/CIR/Dialect/IR/CIROps.td (+45)
(modified) clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp (+60)
(modified) clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp (+22)
(modified) clang/lib/CIR/CodeGen/CIRGenFunction.h (+4)
(modified) clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp (+14)
(added) clang/test/CIR/CodeGen/X86/sse-builtins.c (+23)
(added) clang/test/CIR/CodeGen/X86/sse2-builtins.c (+23)

diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td
index 16258513239d9..902b1fa64fb5b 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIROps.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td
@@ -413,6 +413,18 @@ def CIR_ConstantOp : CIR_Op<"const", [
 
     template <typename T>
     T getValueAttr() { return mlir::dyn_cast<T>(getValue()); }
+
+    llvm::APInt getIntValue() {
+      if (const auto intAttr = getValueAttr<cir::IntAttr>())
+        return intAttr.getValue();
+      llvm_unreachable("Expected an IntAttr in ConstantOp");
+    }
+
+    bool getBoolValue() {
+      if (const auto boolAttr = getValueAttr<cir::BoolAttr>())
+        return boolAttr.getValue();
+      llvm_unreachable("Expected a BoolAttr in ConstantOp");
+    }
   }];
 
   let hasFolder = 1;
@@ -2579,6 +2591,39 @@ def CIR_FuncOp : CIR_Op<"func", [
   }];
 }
 
+//===----------------------------------------------------------------------===//
+// LLVMIntrinsicCallOp
+//===----------------------------------------------------------------------===//
+
+def CIR_LLVMIntrinsicCallOp : CIR_Op<"llvm.intrinsic"> {
+  let summary = "Call to llvm intrinsic functions that is not defined in CIR";
+  let description = [{
+    `cir.llvm.intrinsic` operation represents a call-like expression which has
+    return type and arguments that maps directly to a llvm intrinsic.
+    It only records intrinsic `intrinsic_name`.
+  }];
+
+  let results = (outs Optional<CIR_AnyType>:$result);
+  let arguments = (ins
+                   StrAttr:$intrinsic_name, Variadic<CIR_AnyType>:$arg_ops);
+
+  let skipDefaultBuilders = 1;
+
+  let assemblyFormat = [{
+    $intrinsic_name $arg_ops `:` functional-type($arg_ops, $result) attr-dict
+  }];
+
+  let builders = [
+    OpBuilder<(ins "mlir::StringAttr":$intrinsic_name, "mlir::Type":$resType,
+              CArg<"mlir::ValueRange", "{}">:$operands), [{
+      $_state.addAttribute("intrinsic_name", intrinsic_name);
+      $_state.addOperands(operands);
+      if (resType)
+        $_state.addTypes(resType);
+    }]>,
+  ];
+}
+
 //===----------------------------------------------------------------------===//
 // CallOp
 //===----------------------------------------------------------------------===//
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 0198a9d4eb192..2f02dd0319cd0 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -21,6 +21,49 @@
 using namespace clang;
 using namespace clang::CIRGen;
 
+/// Get integer from a mlir::Value that is an int constant or a constant op.
+static int64_t getIntValueFromConstOp(mlir::Value val) {
+  return val.getDefiningOp<cir::ConstantOp>().getIntValue().getSExtValue();
+}
+
+static mlir::Value emitClFlush(CIRGenFunction& cgf,
+                               const CallExpr* e,
+                               mlir::Value& op) {
+    mlir::Type voidTy = cir::VoidType::get(&cgf.getMLIRContext());
+    mlir::Location location = cgf.getLoc(e->getExprLoc());
+    return cgf.getBuilder()
+        .create<cir::LLVMIntrinsicCallOp>(
+            location, cgf.getBuilder().getStringAttr("x86.sse2.clflush"),
+            voidTy, op)
+        .getResult();
+}
+
+static mlir::Value emitPrefetch(CIRGenFunction& cgf,
+                                const CallExpr* e,
+                                mlir::Value& addr,
+                                int64_t hint) {
+  CIRGenBuilderTy& builder = cgf.getBuilder();
+  mlir::Type voidTy = cir::VoidType::get(&cgf.getMLIRContext());
+  mlir::Type sInt32Ty = cir::IntType::get(&cgf.getMLIRContext(), 32, true);
+  mlir::Value address = builder.createPtrBitcast(addr, voidTy);
+  mlir::Location location = cgf.getLoc(e->getExprLoc());
+  mlir::Value rw =
+      cir::ConstantOp::create(builder, location,
+                              cir::IntAttr::get(sInt32Ty, (hint >> 2) & 0x1));
+  mlir::Value locality =
+      cir::ConstantOp::create(builder, location,
+                              cir::IntAttr::get(sInt32Ty, hint & 0x3));
+  mlir::Value data = cir::ConstantOp::create(builder, location,
+                                             cir::IntAttr::get(sInt32Ty, 1));
+
+  return cir::LLVMIntrinsicCallOp::create(
+             builder, location,
+             builder.getStringAttr("prefetch"), voidTy,
+             mlir::ValueRange{address, rw, locality, data})
+      .getResult();
+}
+
+
 mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
                                                const CallExpr *e) {
   if (builtinID == Builtin::BI__builtin_cpu_is) {
@@ -43,11 +86,28 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
   // Find out if any arguments are required to be integer constant expressions.
   assert(!cir::MissingFeatures::handleBuiltinICEArguments());
 
+  // The operands of the builtin call
+  llvm::SmallVector<mlir::Value, 4> ops;
+
+  // `ICEArguments` is a bitmap indicating whether the argument at the i-th bit
+  // is required to be a constant integer expression.
+  unsigned ICEArguments = 0;
+  ASTContext::GetBuiltinTypeError error;
+  getContext().GetBuiltinType(builtinID, error, &ICEArguments);
+  assert(error == ASTContext::GE_None && "Error while getting builtin type.");
+
+  const unsigned numArgs = e->getNumArgs();
+  for (unsigned i = 0; i != numArgs; i++) {
+    ops.push_back(emitScalarOrConstFoldImmArg(ICEArguments, i, e));
+  }
+
   switch (builtinID) {
   default:
     return {};
   case X86::BI_mm_prefetch:
+    return emitPrefetch(*this, e, ops[0], getIntValueFromConstOp(ops[1]));
   case X86::BI_mm_clflush:
+    return emitClFlush(*this, e, ops[0]);
   case X86::BI_mm_lfence:
   case X86::BI_mm_pause:
   case X86::BI_mm_mfence:
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
index 5eba5ba6c3df1..236f487afd9ba 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
@@ -1430,6 +1430,28 @@ mlir::Value CIRGenFunction::emitPromotedScalarExpr(const Expr *e,
   return ScalarExprEmitter(*this, builder).Visit(const_cast<Expr *>(e));
 }
 
+mlir::Value CIRGenFunction::emitScalarOrConstFoldImmArg(unsigned ICEArguments,
+                                                        unsigned index,
+                                                        const CallExpr *e) {
+  mlir::Value arg{};
+
+  // The bit at the specified index indicates whether the argument is required
+  // to be a constant integer expression.
+  bool isArgRequiredToBeConstant = (ICEArguments & (1 << index));
+
+  if (!isArgRequiredToBeConstant) {
+    arg = emitScalarExpr(e->getArg(index));
+  } else {
+    // If this is required to be a constant, constant fold it so that we
+    // know that the generated intrinsic gets a ConstantInt.
+    std::optional<llvm::APSInt> result =
+        e->getArg(index)->getIntegerConstantExpr(getContext());
+    assert(result && "Expected argument to be a constant");
+    arg = builder.getConstInt(getLoc(e->getSourceRange()), *result);
+  }
+  return arg;
+}
+
 [[maybe_unused]] static bool mustVisitNullValue(const Expr *e) {
   // If a null pointer expression's type is the C++0x nullptr_t and
   // the expression is not a simple literal, it must be evaluated
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h
index f879e580989f7..0ce8714b23e82 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.h
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h
@@ -1529,6 +1529,10 @@ class CIRGenFunction : public CIRGenTypeCache {
   mlir::Value emitScalarExpr(const clang::Expr *e,
                              bool ignoreResultAssign = false);
 
+  mlir::Value emitScalarOrConstFoldImmArg(unsigned ICEArguments,
+                                          unsigned index,
+                                          const CallExpr *e);
+
   mlir::Value emitScalarPrePostIncDec(const UnaryOperator *e, LValue lv,
                                       cir::UnaryOpKind kind, bool isPre);
 
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index b4afed7019417..f4379b402fe13 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -320,6 +320,20 @@ static mlir::LLVM::CallIntrinsicOp replaceOpWithCallLLVMIntrinsicOp(
   return callIntrinOp;
 }
 
+mlir::LogicalResult CIRToLLVMLLVMIntrinsicCallOpLowering::matchAndRewrite(
+    cir::LLVMIntrinsicCallOp op,
+    OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  mlir::Type llvmResTy =
+      getTypeConverter()->convertType(op->getResultTypes()[0]);
+  if (!llvmResTy)
+    return op.emitError("expected LLVM result type");
+  StringRef name = op.getIntrinsicName();
+  replaceOpWithCallLLVMIntrinsicOp(rewriter, op, "llvm." + name, llvmResTy,
+                                   adaptor.getOperands());
+  return mlir::success();
+}
+
 /// IntAttr visitor.
 mlir::Value CIRAttrToValue::visitCirAttr(cir::IntAttr intAttr) {
   mlir::Location loc = parentOp->getLoc();
diff --git a/clang/test/CIR/CodeGen/X86/sse-builtins.c b/clang/test/CIR/CodeGen/X86/sse-builtins.c
new file mode 100644
index 0000000000000..4dd141168ba66
--- /dev/null
+++ b/clang/test/CIR/CodeGen/X86/sse-builtins.c
@@ -0,0 +1,23 @@
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse -fclangir -emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse -fclangir -emit-llvm -o %t.ll -Wall -Werror
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse -fno-signed-char -fclangir -emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse -fclangir -emit-llvm -o %t.ll -Wall -Werror
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+
+// This test mimics clang/test/CodeGen/X86/sse-builtins.c, which eventually
+// CIR shall be able to support fully.
+
+#include <immintrin.h>
+
+
+void test_mm_prefetch(char const* p) {
+  // CIR-LABEL: test_mm_prefetch
+  // LLVM-LABEL: test_mm_prefetch
+  _mm_prefetch(p, 0);
+  // CIR: cir.prefetch read locality(0) %{{.*}} : !cir.ptr<!void>
+  // LLVM: call void @llvm.prefetch.p0(ptr {{.*}}, i32 0, i32 0, i32 1)
+}
diff --git a/clang/test/CIR/CodeGen/X86/sse2-builtins.c b/clang/test/CIR/CodeGen/X86/sse2-builtins.c
new file mode 100644
index 0000000000000..0c275fa089262
--- /dev/null
+++ b/clang/test/CIR/CodeGen/X86/sse2-builtins.c
@@ -0,0 +1,23 @@
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse2 -fclangir -emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefixes=CIR-CHECK --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse2 -fno-signed-char -fclangir -emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefixes=CIR-CHECK --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse2 -fclangir -emit-llvm -o %t.ll -Wall -Werror
+// RUN: FileCheck --check-prefixes=LLVM-CHECK --input-file=%t.ll %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse2 -fno-signed-char -fclangir -emit-llvm -o %t.ll -Wall -Werror
+// RUN: FileCheck --check-prefixes=LLVM-CHECK --input-file=%t.ll %s
+
+// This test mimics clang/test/CodeGen/X86/sse2-builtins.c, which eventually
+// CIR shall be able to support fully.
+
+#include <immintrin.h>
+
+
+void test_mm_clflush(void* A) {
+  // CIR-LABEL: test_mm_clflush
+  // LLVM-LABEL: teh
+  _mm_clflush(A);
+  // CIR-CHECK: {{%.*}} = cir.llvm.intrinsic "x86.sse2.clflush" {{%.*}} : (!cir.ptr<!void>) -> !void
+  // LLVM-CHECK: call void @llvm.x86.sse2.clflush(ptr {{%.*}})
+}

github-actions · 2025-11-10T22:11:49Z

✅ With the latest revision this PR passed the C/C++ code formatter.

HendrikHuebner · 2025-11-10T22:11:50Z

This is my first contribution to ClangIR, upstreaming these seemed to be some good low hanging fruits to get started :)

Does the code have to be auto formatted? I'm not sure if the CI's suggestions are improvements in this case...

andykaylor · 2025-11-10T23:15:29Z

Hi @HendrikHuebner . Thanks for the PR!

Yes, the code should be formatted to pass the clang-format check run by CI. You can do that using this command:

git diff -U0 --no-color --relative HEAD^ | <path-to-clang-format>/clang-format-diff.py -p1 -i

When you build Clang, it also builds clang-format and clang-format-diff.py. I find it useful to save a pre-built copy to another directory.

clang/include/clang/CIR/Dialect/IR/CIROps.td

clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp

andykaylor · 2025-11-10T23:37:44Z

clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp

  return ScalarExprEmitter(*this, builder).Visit(const_cast<Expr *>(e));
 }

+mlir::Value CIRGenFunction::emitScalarOrConstFoldImmArg(unsigned ICEArguments,


This is also being added in #167125

See my comments there about using a range-for at the call sites and passing the argument expression directly to this function.

I'd prefer to have the other PR merged first, then rebase this one to use the definition of this function that will be added there.

Hi, you mentioned under another PR that you would like to merge this PR first after all. I addressed all the feedback and removed the prefetch builtin from this patch. Could you give it another look? Thanks.

clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp

andykaylor · 2025-11-10T23:52:10Z

clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp

  default:
    return {};
  case X86::BI_mm_prefetch:
+    return emitPrefetch(*this, e, ops[0], getIntValueFromConstOp(ops[1]));


I'm not sure you're actually hitting this case. The test you added looks like it would get here, but the check in the test is checking for cir.prefetch being generated rather than cir.llvm.intrinsic as the code in this PR would generate.

I just checked and the prefetch test passes without this PR being applied. This is because in xmmintrin.h the definition of _mm_prefetch looks like this:

extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_prefetch(const void *__P, enum _mm_hint __I) { /* Current PowerPC will ignores the hint parameters. */ __builtin_prefetch(__P); }

It's calling the general prefetch builtin rather than the X86-specific builtin. That's actually what we want. I'm not sure this handler is needed at all.

Hi, thanks for the review. Should we still keep the handler? Clangs codegen also provides a handler for X86::BI_mm_prefetch and I think they test it the same way here

Oh, I see what's going on now. The version I showed above was from clang/lib/Headers/ppc_wrappers/xmmintrin.h and so isn't the definition we normally find. In the usual version (clang/lib/Headers/xmmintrin.h) I find this:

#ifndef _MSC_VER // If _MSC_VER is defined, we use the builtin variant of _mm_prefetch. // Otherwise, we provide this macro, which includes a cast, allowing the user // to pass a pointer of any time. The _mm_prefetch accepts char to match MSVC. /// Loads one cache line of data from the specified address to a location /// closer to the processor. /// /// \headerfile <x86intrin.h> /// /// \code /// void _mm_prefetch(const void *a, const int sel); /// \endcode /// /// This intrinsic corresponds to the <c> PREFETCHNTA </c> instruction. /// /// \param a /// A pointer to a memory location containing a cache line of data. /// \param sel /// A predefined integer constant specifying the type of prefetch /// operation: \n /// _MM_HINT_NTA: Move data using the non-temporal access (NTA) hint. The /// PREFETCHNTA instruction will be generated. \n /// _MM_HINT_T0: Move data using the T0 hint. The PREFETCHT0 instruction will /// be generated. \n /// _MM_HINT_T1: Move data using the T1 hint. The PREFETCHT1 instruction will /// be generated. \n /// _MM_HINT_T2: Move data using the T2 hint. The PREFETCHT2 instruction will /// be generated. #define _mm_prefetch(a, sel) (__builtin_prefetch((const void *)(a), \ ((sel) >> 2) & 1, (sel) & 0x3)) #endif

So, we hit the BI_mm_prefetch built-in if-and-only-if _MSC_VER is defined (otherwise, the macro above maps it to the general prefetch builtin).

Note that in the test you linked, several of the RUN lines contain -triple=x86_64-windows-msvc and -fms-compatibility, which will cause us to fall back on the BI_mm_prefetch builtin rather than mapping directly to builtin_prefetch.

I also noticed that there are _m_prefectch and _m_prefetchw builtins that were added here and are missing from the incubator.

We're missing a lot of support for Windows, but mostly ABI-related things so the builtin handling may work. However, we should be generating the cir.prefetch operation rather than a call to the LLVM intrinsic. I'd suggested moving this into a separate PR.

Co-authored-by: Andy Kaylor <[email protected]>

andykaylor

This looks good. It just needs a few updates to the tests.

andykaylor · 2025-11-12T22:39:39Z

clang/test/CIR/CodeGen/X86/sse-builtins.c

+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse -fclangir -emit-llvm -o %t.ll -Wall -Werror
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+


Can you also add the OGCG checks. It's useful to manually verify that we're generating equivalent code.

// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG

andykaylor · 2025-11-12T22:42:20Z

clang/test/CIR/CodeGen/X86/sse2-builtins.c

@@ -0,0 +1,47 @@
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse2 -fclangir -emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefixes=CIR-CHECK --input-file=%t.cir %s


Suggested change

// RUN: FileCheck --check-prefixes=CIR-CHECK --input-file=%t.cir %s

// RUN: FileCheck --check-prefixes=CIR --input-file=%t.cir %s

Can you replace all instances of CIR-CHECK with CIR and LLVM-CHECK with LLVM? I'm not sure what the person who implemented that in the incubator was trying to accomplish, but it's not necessary. As currently written, this test will ignore the CIR-LABEL and LLVM-LABEL checks. Those are label checks for the CIR and LLVM prefixes.

Also, please add OGCG checks here too.

Done. What is the point of adding CIR-LABEL instead of just CIR? I've seen CIR-NEXT and CIR-SAME as well, are there docs about what these do?

Edit: Ahh I found this: https://llvm.org/docs/CommandGuide/FileCheck.html

@andykaylor Hi, you mentioned you would like to see this PR merged before the other builtin upstreaming PRs, do you think we can land it now?

andykaylor

lgtm

Do you need me to merge this for you?

HendrikHuebner · 2025-11-13T22:56:54Z

lgtm

Do you need me to merge this for you?

Yes, I don't think I have write permissions. Thanks!

Also, do you need me to sqaush the commits into one commit first, and should I give it a proper description?

[CIR] Upstream X86 builtin _mm_prefetch and _mm_clflush

539eebe

HendrikHuebner requested review from andykaylor, bcardosolopes, lanza and xlauko as code owners November 10, 2025 22:10

llvmbot added clang Clang issues not falling into any other category ClangIR Anything related to the ClangIR project labels Nov 10, 2025

fix warning

de47432

andykaylor reviewed Nov 10, 2025

View reviewed changes

HendrikHuebner and others added 4 commits November 11, 2025 11:56

Update clang/include/clang/CIR/Dialect/IR/CIROps.td

26d8914

Co-authored-by: Andy Kaylor <[email protected]>

feedback

93c0ded

formatting

9eb5199

formatting + refactor

776f5c1

HendrikHuebner changed the title ~~[CIR] Upstream X86 builtin _mm_prefetch and _mm_clflush~~ [CIR] Upstream X86 builtin _mm_prefetch, _mm_clflush, fence and pause builtin Nov 12, 2025

HendrikHuebner changed the title ~~[CIR] Upstream X86 builtin _mm_prefetch, _mm_clflush, fence and pause builtin~~ [CIR] Upstream X86 builtin _mm_prefetch, _mm_clflush, fence and pause Nov 12, 2025

HendrikHuebner mentioned this pull request Nov 12, 2025

[CIR] Upstream codegen for X86 SSE builtins #167584

Closed

Thibault-Monnier added a commit to Thibault-Monnier/llvm-project that referenced this pull request Nov 12, 2025

Resolve conflicts with PR llvm#167401 + code style

bfccdb1

Remove prefetch builtin and address feedback

8a07992

HendrikHuebner changed the title ~~[CIR] Upstream X86 builtin _mm_prefetch, _mm_clflush, fence and pause~~ [CIR] Upstream X86 builtin clflush, fence and pause Nov 12, 2025

formatting

e481360

Thibault-Monnier mentioned this pull request Nov 12, 2025

[CIR] Upstream handling of X86 builtins #167752

Open

andykaylor reviewed Nov 12, 2025

View reviewed changes

Improve tests

a665f70

HendrikHuebner requested a review from andykaylor November 13, 2025 19:05

andykaylor approved these changes Nov 13, 2025

View reviewed changes

andykaylor mentioned this pull request Nov 13, 2025

[CIR] Upstream CIR codegen for mxcsr x86 builtins #167948

Open

andykaylor merged commit 3ff3c4e into llvm:main Nov 13, 2025
10 checks passed

andykaylor mentioned this pull request Nov 14, 2025

[CIR] Upstream CIR codegen for vec_ext x86 builtins #167942

Merged

		@@ -0,0 +1,47 @@
		// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse2 -fclangir -emit-cir -o %t.cir -Wall -Werror
		// RUN: FileCheck --check-prefixes=CIR-CHECK --input-file=%t.cir %s

	// RUN: FileCheck --check-prefixes=CIR-CHECK --input-file=%t.cir %s
	// RUN: FileCheck --check-prefixes=CIR --input-file=%t.cir %s

[CIR] Upstream X86 builtin clflush, fence and pause #167401

[CIR] Upstream X86 builtin clflush, fence and pause #167401

Uh oh!

Conversation

HendrikHuebner commented Nov 10, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Nov 10, 2025

Uh oh!

llvmbot commented Nov 10, 2025

Uh oh!

github-actions bot commented Nov 10, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

HendrikHuebner commented Nov 10, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

andykaylor commented Nov 10, 2025

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

andykaylor left a comment

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

HendrikHuebner Nov 13, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

andykaylor left a comment

Choose a reason for hiding this comment

Uh oh!

HendrikHuebner commented Nov 13, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

3 participants

HendrikHuebner commented Nov 10, 2025 •

edited

Loading

github-actions bot commented Nov 10, 2025 •

edited

Loading

HendrikHuebner commented Nov 10, 2025 •

edited

Loading

HendrikHuebner Nov 13, 2025 •

edited

Loading

HendrikHuebner commented Nov 13, 2025 •

edited

Loading