[X86][GlobalIsel] Support IS_FP_CLASS intrinsic 1/4 #148801

mahesh-attarde · 2025-07-15T08:22:50Z

This patch implments is_fp_class intrinsic for globalisel. This is patch 1/4.

test fpclass.ll will be added by new PR like always, here it is representative.

llvmbot · 2025-07-15T08:23:24Z

@llvm/pr-subscribers-backend-x86

Author: Mahesh-Attarde (mahesh-attarde)

Changes

This patch implments is_fp_class intrinsic for globalisel. This is patch 1/4.

Patch is 22.49 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/148801.diff

4 Files Affected:

(modified) llvm/lib/CodeGen/LowLevelTypeUtils.cpp (+2)
(modified) llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp (+230)
(modified) llvm/lib/Target/X86/GISel/X86LegalizerInfo.h (+6)
(added) llvm/test/CodeGen/X86/fpclass.ll (+323)

diff --git a/llvm/lib/CodeGen/LowLevelTypeUtils.cpp b/llvm/lib/CodeGen/LowLevelTypeUtils.cpp
index 936c9fbb2fff0..226119384140e 100644
--- a/llvm/lib/CodeGen/LowLevelTypeUtils.cpp
+++ b/llvm/lib/CodeGen/LowLevelTypeUtils.cpp
@@ -80,6 +80,8 @@ const llvm::fltSemantics &llvm::getFltSemanticForLLT(LLT Ty) {
     return APFloat::IEEEsingle();
   case 64:
     return APFloat::IEEEdouble();
+  case 80:
+    return APFloat::x87DoubleExtended();
   case 128:
     return APFloat::IEEEquad();
   }
diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
index 7fe58539cd4ec..642104e447aac 100644
--- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
+++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
@@ -13,6 +13,7 @@
 #include "X86LegalizerInfo.h"
 #include "X86Subtarget.h"
 #include "X86TargetMachine.h"
+#include "llvm/CodeGen/CodeGenCommonISel.h"
 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
@@ -579,6 +580,7 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
   getActionDefinitionsBuilder({G_DYN_STACKALLOC, G_STACKSAVE, G_STACKRESTORE})
       .lower();
 
+  getActionDefinitionsBuilder(G_IS_FPCLASS).custom();
   // fp intrinsics
   getActionDefinitionsBuilder(G_INTRINSIC_ROUNDEVEN)
       .scalarize(0)
@@ -616,6 +618,8 @@ bool X86LegalizerInfo::legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI,
     return legalizeFPTOSI(MI, MRI, Helper);
   case TargetOpcode::G_GET_ROUNDING:
     return legalizeGETROUNDING(MI, MRI, Helper);
+  case TargetOpcode::G_IS_FPCLASS:
+    return legalizeIsFPClass(MI, MRI, Helper);
   }
   llvm_unreachable("expected switch to return");
 }
@@ -853,10 +857,236 @@ bool X86LegalizerInfo::legalizeGETROUNDING(MachineInstr &MI,
   auto RetValTrunc = MIRBuilder.buildZExtOrTrunc(DstTy, RetVal);
 
   MIRBuilder.buildCopy(Dst, RetValTrunc);
+  MI.eraseFromParent();
+  return true;
+}
+
+bool X86LegalizerInfo::expandFPClassTestForF32OrF64(
+    MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const {
+  MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
+  auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
+  FPClassTest Test = static_cast<FPClassTest>(MI.getOperand(2).getImm());
+  assert(!SrcTy.isVector() && "G_IS_FPCLASS does not support vectors yet");
+  const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
+
+  // Some checks may be represented as inversion of simpler check, for example
+  // "inf|normal|subnormal|zero" => !"nan".
+  bool IsInverted = false;
+
+  if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test, false)) {
+    Test = InvertedCheck;
+    IsInverted = true;
+  }
+
+  // In the general case use integer operations.
+  unsigned BitSize = SrcTy.getScalarSizeInBits();
+  LLT IntVT = LLT::scalar(BitSize);
+  // MachineInstrBuilder OpAsInt = MIRBuilder.buildBitcast(IntVT, SrcReg);
+  MachineInstrBuilder OpAsInt = MIRBuilder.buildCopy(IntVT, SrcReg);
+
+  // Various Mask
+  APInt SignMask = APInt::getSignMask(BitSize);
+  APInt ValueMask = APInt::getSignedMaxValue(BitSize);
+  APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt();
+  APInt InfPlus1 = Inf + 1;
+  APInt ExpMask = Inf;
+  APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
+  APInt QNaNBitMask =
+      APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
+  APInt InvertionMask = APInt::getAllOnes(DstTy.getScalarSizeInBits());
+
+  auto ValueMaskV = MIRBuilder.buildConstant(IntVT, ValueMask);
+  auto SignBitV = MIRBuilder.buildConstant(IntVT, SignMask);
+  auto ExpMaskV = MIRBuilder.buildConstant(IntVT, ExpMask);
+  auto ZeroV = MIRBuilder.buildConstant(IntVT, 0);
+  auto InfV = MIRBuilder.buildConstant(IntVT, Inf);
+  auto InfPlus1V = MIRBuilder.buildConstant(IntVT, InfPlus1);
+  auto ResultInvertedV = MIRBuilder.buildConstant(DstTy, InvertionMask);
+
+  MachineInstrBuilder Res;
+  const auto appendResult = [&](MachineInstrBuilder &PartialRes) {
+    if (PartialRes.getInstr()) {
+      if (Res.getInstr()) {
+        Res = MIRBuilder.buildOr(DstTy, Res, PartialRes);
+      } else {
+        Res = PartialRes;
+      }
+    }
+  };
+  // Split the value into sign bit and absolute value.
+  auto AbsV = MIRBuilder.buildAnd(IntVT, OpAsInt, ValueMaskV);
+  auto SignVDestReg = MRI.createGenericVirtualRegister(LLT::scalar(1));
+  auto SignV =
+      MIRBuilder.buildICmp(CmpInst::ICMP_SLT, SignVDestReg, OpAsInt, ZeroV);
+
+  // Tests that involve more than one class should be processed first.
+  MachineInstrBuilder PartialRes;
+
+  if ((Test & fcFinite) == fcFinite) {
+    // finite(V) ==> abs(V) < exp_mask
+    PartialRes = MIRBuilder.buildICmp(
+        IsInverted ? CmpInst::ICMP_SGE : CmpInst::ICMP_SLT,
+        MRI.createGenericVirtualRegister(LLT::scalar(1)), AbsV, ExpMaskV);
+    Test &= ~fcFinite;
+  } else if ((Test & fcFinite) == fcPosFinite) {
+    // finite(V) && V > 0 ==> V < exp_mask
+    PartialRes = MIRBuilder.buildICmp(
+        IsInverted ? CmpInst::ICMP_UGE : CmpInst::ICMP_ULT,
+        MRI.createGenericVirtualRegister(LLT::scalar(1)), OpAsInt, ExpMaskV);
+    Test &= ~fcPosFinite;
+  } else if ((Test & fcFinite) == fcNegFinite) {
+    // finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1
+    auto PartialResPart = MIRBuilder.buildICmp(
+        CmpInst::ICMP_SLT, MRI.createGenericVirtualRegister(LLT::scalar(1)),
+        AbsV, ExpMaskV);
+    PartialRes = MIRBuilder.buildAnd(LLT::scalar(1), PartialResPart, SignV);
+    Test &= ~fcNegFinite;
+  }
+  appendResult(PartialRes);
+
+  if (FPClassTest PartialCheck = Test & (fcZero | fcSubnormal)) {
+    // fcZero | fcSubnormal => test all exponent bits are 0
+    // TODO: Handle sign bit specific cases
+    if (PartialCheck == (fcZero | fcSubnormal)) {
+      auto ExpBits = MIRBuilder.buildAnd(IntVT, OpAsInt, ExpMaskV);
+      auto ExpIsZero = MIRBuilder.buildICmp(
+          CmpInst::ICMP_EQ, MRI.createGenericVirtualRegister(LLT::scalar(1)),
+          ExpBits, ZeroV);
+      appendResult(ExpIsZero);
+      Test &= ~PartialCheck & fcAllFlags;
+    }
+  }
+
+  // Check for individual classes.
+  if (unsigned PartialCheck = Test & fcZero) {
+    if (PartialCheck == fcPosZero)
+      PartialRes = MIRBuilder.buildICmp(
+          CmpInst::ICMP_EQ, MRI.createGenericVirtualRegister(LLT::scalar(1)),
+          OpAsInt, ZeroV);
+    else if (PartialCheck == fcZero)
+      PartialRes = MIRBuilder.buildICmp(
+          CmpInst::ICMP_EQ, MRI.createGenericVirtualRegister(LLT::scalar(1)),
+          AbsV, ZeroV);
+    else // ISD::fcNegZero
+      PartialRes = MIRBuilder.buildICmp(
+          CmpInst::ICMP_EQ, MRI.createGenericVirtualRegister(LLT::scalar(1)),
+          OpAsInt, SignBitV);
+    appendResult(PartialRes);
+  }
+  if (unsigned PartialCheck = Test & fcSubnormal) {
+    assert("Not Supported yet!");
+  }
+  if (unsigned PartialCheck = Test & fcInf) {
+    if (PartialCheck == fcPosInf)
+      PartialRes = MIRBuilder.buildICmp(
+          IsInverted ? CmpInst::ICMP_NE : CmpInst::ICMP_EQ,
+          MRI.createGenericVirtualRegister(LLT::scalar(1)), OpAsInt, InfV);
+    else if (PartialCheck == fcInf)
+      PartialRes = MIRBuilder.buildICmp(
+          IsInverted ? CmpInst::ICMP_NE : CmpInst::ICMP_EQ,
+          MRI.createGenericVirtualRegister(LLT::scalar(1)), AbsV, InfV);
+    else { // ISD::fcNegInf
+      APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
+      auto NegInfV = MIRBuilder.buildConstant(IntVT, NegInf);
+      PartialRes = MIRBuilder.buildICmp(
+          IsInverted ? CmpInst::ICMP_NE : CmpInst::ICMP_EQ,
+          MRI.createGenericVirtualRegister(LLT::scalar(1)), OpAsInt, NegInfV);
+    }
+    MIRBuilder.buildCopy(DstReg, PartialRes);
+    MI.eraseFromParent();
+    return true;
+  }
+  if (unsigned PartialCheck = Test & fcNan) {
+    APInt InfWithQnanBit = Inf | QNaNBitMask;
+    auto InfWithQnanBitV = MIRBuilder.buildConstant(IntVT, InfWithQnanBit);
+    if (PartialCheck == fcNan) {
+      // isnan(V) ==> abs(V) > int(inf)
+      auto AbsDstReg = MRI.createGenericVirtualRegister(LLT::scalar(BitSize));
+      auto FAbsV = MIRBuilder.buildCopy(AbsDstReg, SrcReg);
+      auto InfVDstReg = MRI.createGenericVirtualRegister(LLT::scalar(BitSize));
+      PartialRes = MIRBuilder.buildFCmp(
+          CmpInst::FCMP_UEQ, MRI.createGenericVirtualRegister(LLT::scalar(1)),
+          FAbsV, FAbsV);
+    } else if (PartialCheck == fcQNan) {
+      // isquiet(V) ==> abs(V) >= (unsigned(Inf) | quiet_bit)
+      PartialRes = MIRBuilder.buildICmp(
+          IsInverted ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGE,
+          MRI.createGenericVirtualRegister(LLT::scalar(1)), AbsV,
+          InfWithQnanBitV);
+
+    } else { // ISD::fcSNan
+      // issignaling(V) ==> abs(V) > unsigned(Inf) &&
+      //                    abs(V) < (unsigned(Inf) | quiet_bit)
+      auto IsNotQnan = MIRBuilder.buildICmp(
+          CmpInst::ICMP_SLT, MRI.createGenericVirtualRegister(LLT::scalar(1)),
+          AbsV, InfWithQnanBitV);
+      auto IsNan = MIRBuilder.buildICmp(
+          CmpInst::ICMP_SGE, MRI.createGenericVirtualRegister(LLT::scalar(1)),
+          AbsV, InfPlus1V);
+      PartialRes = MIRBuilder.buildAnd(LLT::scalar(1), IsNan, IsNotQnan);
+    }
+    MIRBuilder.buildCopy(DstReg, PartialRes);
+    MI.eraseFromParent();
+    return true;
+  }
+  if (unsigned PartialCheck = Test & fcNormal) {
+    assert("Not Supported yet!");
+  }
+  if (unsigned PartialCheck = Test & fcSubnormal) {
+    // subnormal(V) ==> abs(V) < exp_mask && signbit == 0
+    auto ExpBits = MIRBuilder.buildAnd(IntVT, OpAsInt, ExpMaskV);
+    auto ExpIsZero = MIRBuilder.buildICmp(
+        CmpInst::ICMP_EQ, MRI.createGenericVirtualRegister(LLT::scalar(1)),
+        ExpBits, ZeroV);
+    auto SignBit = MIRBuilder.buildICmp(
+        CmpInst::ICMP_EQ, MRI.createGenericVirtualRegister(LLT::scalar(1)),
+        SignV, ZeroV);
+    PartialRes = MIRBuilder.buildAnd(LLT::scalar(1), ExpIsZero, SignBit);
+    appendResult(PartialRes);
+  }
+  if (!Res.getInstr()) {
+    Res = MIRBuilder.buildConstant(LLT::scalar(1), IsInverted);
+    MIRBuilder.buildCopy(DstReg, Res);
+    MI.eraseFromParent();
+    return true;
+  }
 
+  MIRBuilder.buildCopy(DstReg, Res);
   MI.eraseFromParent();
   return true;
 }
+bool X86LegalizerInfo::expandFPClassTestForF80(MachineInstr &MI,
+                                               MachineRegisterInfo &MRI,
+                                               LegalizerHelper &Helper) const {
+  return false;
+}
+
+bool X86LegalizerInfo::legalizeIsFPClass(MachineInstr &MI,
+                                         MachineRegisterInfo &MRI,
+                                         LegalizerHelper &Helper) const {
+  MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
+  auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
+  assert(!SrcTy.isVector() && "G_IS_FPCLASS does not support vectors yet");
+
+  FPClassTest Mask = static_cast<FPClassTest>(MI.getOperand(2).getImm());
+  if (Mask == fcNone) {
+    MIRBuilder.buildConstant(DstReg, 0);
+    MI.eraseFromParent();
+    return true;
+  }
+  if (Mask == fcAllFlags) {
+    MIRBuilder.buildConstant(DstReg, 1);
+    MI.eraseFromParent();
+    return true;
+  }
+  bool IsF80 = (SrcTy == LLT::scalar(80));
+  // For f32/f64/f80 if NoFpException is set, we can use the FCMP
+  // Some checks can be implemented using float comparisons, if floating point
+  // exceptions are ignored.
+
+  if (IsF80)
+    return expandFPClassTestForF80(MI, MRI, Helper);
+}
 
 bool X86LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
                                          MachineInstr &MI) const {
diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h
index 0003552d70ee0..107dd1c8af605 100644
--- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h
+++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h
@@ -57,6 +57,12 @@ class X86LegalizerInfo : public LegalizerInfo {
 
   bool legalizeGETROUNDING(MachineInstr &MI, MachineRegisterInfo &MRI,
                            LegalizerHelper &Helper) const;
+  bool expandFPClassTestForF32OrF64(MachineInstr &MI, MachineRegisterInfo &MRI,
+                                    LegalizerHelper &Helper) const;
+  bool expandFPClassTestForF80(MachineInstr &MI, MachineRegisterInfo &MRI,
+                               LegalizerHelper &Helper) const;
+  bool legalizeIsFPClass(MachineInstr &MI, MachineRegisterInfo &MRI,
+                         LegalizerHelper &Helper) const;
 };
 } // namespace llvm
 #endif
diff --git a/llvm/test/CodeGen/X86/fpclass.ll b/llvm/test/CodeGen/X86/fpclass.ll
new file mode 100644
index 0000000000000..80c3d579871aa
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fpclass.ll
@@ -0,0 +1,323 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=i686-linux | FileCheck %s -check-prefixes=X86
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefixes=X64
+; RUN: llc < %s -mtriple=i686-linux -global-isel | FileCheck %s -check-prefixes=X86,X86-GISEL
+; RUN: llc < %s -mtriple=x86_64-linux -global-isel | FileCheck %s -check-prefixes=X64,X64-GISEL
+
+; FIXME: We can reuse llvm/test/CodeGen/X86/is_fpclass.ll when all patches are included.
+
+define i1 @is_fcNone_f32(float %x) nounwind {
+; X86-LABEL: is_fcNone_f32:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: is_fcNone_f32:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    retq
+entry:
+    %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 0)
+    ret i1 %0
+}
+
+define i1 @is_fcAllFlags_f32(float %x) nounwind {
+; X86-LABEL: is_fcAllFlags_f32:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movb $1, %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: is_fcAllFlags_f32:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movb $1, %al
+; X64-NEXT:    retq
+entry:
+    %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1023)
+    ret i1 %0
+}
+
+define i1 @issignaling_f(float %x) {
+; X64-LABEL: issignaling_f:
+; X64:       # %bb.0:
+; X64-NEXT:    movd %xmm0, %eax
+; X64-NEXT:    andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-NEXT:    cmpl $2143289344, %eax # imm = 0x7FC00000
+; X64-NEXT:    setl %cl
+; X64-NEXT:    cmpl $2139095041, %eax # imm = 0x7F800001
+; X64-NEXT:    setge %al
+; X64-NEXT:    andb %cl, %al
+; X64-NEXT:    retq
+;
+; X86-GISEL-LABEL: issignaling_f:
+; X86-GISEL:       # %bb.0:
+; X86-GISEL-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-GISEL-NEXT:    andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-GISEL-NEXT:    cmpl $2143289344, %eax # imm = 0x7FC00000
+; X86-GISEL-NEXT:    setl %cl
+; X86-GISEL-NEXT:    cmpl $2139095041, %eax # imm = 0x7F800001
+; X86-GISEL-NEXT:    setge %al
+; X86-GISEL-NEXT:    andb %cl, %al
+; X86-GISEL-NEXT:    retl
+   %a0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1)  ; "snan"
+   ret i1 %a0
+}
+
+ define i1 @isquiet_f(float %x) {
+; X64-LABEL: isquiet_f:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movd %xmm0, %eax
+; X64-NEXT:    andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-NEXT:    cmpl $2143289344, %eax # imm = 0x7FC00000
+; X64-NEXT:    setge %al
+; X64-NEXT:    retq
+;
+; X86-GISEL-LABEL: isquiet_f:
+; X86-GISEL:       # %bb.0: # %entry
+; X86-GISEL-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-GISEL-NEXT:    andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-GISEL-NEXT:    cmpl $2143289344, %eax # imm = 0x7FC00000
+; X86-GISEL-NEXT:    setge %al
+; X86-GISEL-NEXT:    retl
+ entry:
+   %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 2)  ; "qnan"
+   ret i1 %0
+}
+
+define i1 @not_isquiet_f(float %x) {
+; X64-LABEL: not_isquiet_f:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movd %xmm0, %eax
+; X64-NEXT:    andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-NEXT:    cmpl $2143289344, %eax # imm = 0x7FC00000
+; X64-NEXT:    setl %al
+; X64-NEXT:    retq
+;
+; X86-GISEL-LABEL: not_isquiet_f:
+; X86-GISEL:       # %bb.0: # %entry
+; X86-GISEL-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-GISEL-NEXT:    andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-GISEL-NEXT:    cmpl $2143289344, %eax # imm = 0x7FC00000
+; X86-GISEL-NEXT:    setl %al
+; X86-GISEL-NEXT:    retl
+entry:
+  %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1021)  ; ~"qnan"
+  ret i1 %0
+}
+
+define i1 @isinf_f(float %x) {
+; X64-LABEL: isinf_f:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movd %xmm0, %eax
+; X64-NEXT:    andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-NEXT:    cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-NEXT:    sete %al
+; X64-NEXT:    retq
+;
+; X86-GISEL-LABEL: isinf_f:
+; X86-GISEL:       # %bb.0: # %entry
+; X86-GISEL-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-GISEL-NEXT:    andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-GISEL-NEXT:    cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-GISEL-NEXT:    sete %al
+; X86-GISEL-NEXT:    retl
+entry:
+  %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 516)  ; 0x204 = "inf"
+  ret i1 %0
+}
+
+define i1 @not_isinf_f(float %x) {
+; X64-LABEL: not_isinf_f:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movd %xmm0, %eax
+; X64-NEXT:    andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-NEXT:    cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-NEXT:    setne %al
+; X64-NEXT:    retq
+;
+; X86-GISEL-LABEL: not_isinf_f:
+; X86-GISEL:       # %bb.0: # %entry
+; X86-GISEL-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-GISEL-NEXT:    andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-GISEL-NEXT:    cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-GISEL-NEXT:    setne %al
+; X86-GISEL-NEXT:    retl
+entry:
+  %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 507)  ; ~0x204 = "~inf"
+  ret i1 %0
+}
+
+define i1 @is_plus_inf_f(float %x) {
+; X86-LABEL: is_plus_inf_f:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    cmpl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000
+; X86-NEXT:    sete %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: is_plus_inf_f:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movd %xmm0, %eax
+; X64-NEXT:    cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-NEXT:    sete %al
+; X64-NEXT:    retq
+entry:
+  %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 512)  ; 0x200 = "+inf"
+  ret i1 %0
+}
+
+define i1 @is_minus_inf_f(float %x) {
+; X86-LABEL: is_minus_inf_f:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    cmpl $-8388608, {{[0-9]+}}(%esp) # imm = 0xFF800000
+; X86-NEXT:    sete %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: is_minus_inf_f:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movd %xmm0, %eax
+; X64-NEXT:    cmpl $-8388608, %eax # imm = 0xFF800000
+; X64-NEXT:    sete %al
+; X64-NEXT:    retq
+entry:
+  %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 4)  ; "-inf"
+  ret i1 %0
+}
+
+define i1 @not_is_minus_inf_f(float %x) {
+; X86-LABEL: not_is_minus_inf_f:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    cmpl $-8388608, {{[0-9]+}}(%esp) # imm = 0xFF800000
+; X86-NEXT:    setne %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: not_is_minus_inf_f:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movd %xmm0, %eax
+; X64-NEXT:    cmpl $-8388608, %eax # imm = 0xFF800000
+; X64-NEXT:    setne %al
+; X64-NEXT:    retq
+entry:
+  %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1019)  ; ~"-inf"
+  ret i1 %0
+}
+
+define i1 @isfinite_f(float %x) {
+; X64-LABEL: isfinite_f:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movd %xmm0, %eax
+; X64-NEXT:    andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-NEXT:    cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-NEXT:    setl %al
+; X64-NEXT:    retq
+;
+; X86-GISEL-LABEL: isfinite_f:
+; X86-GISEL:       # %bb.0: # %entry
+; X86-GISEL-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-GISEL-NEXT:    andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-GISEL-NEXT:    cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-GISEL-NEXT:    setl %al
+; X86-GISEL-NEXT:    retl
+entry:
+  %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 504)  ; 0x1f8 = "finite"
+  ret i1 %0
+}
+
+define i1 @not_isfinite_f(float %x) {
+; X64-LABEL: not_isfinite_f:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movd %xmm0, %eax
+; X64-NEXT:    andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-NEXT:    cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-NEXT:    setge %al
+; X64-NEXT:    retq
+;
+; X86-GISEL-LABEL: no...
[truncated]

github-actions · 2025-07-15T08:29:18Z

⚠️ C/C++ code formatter, clang-format found issues in your code. ⚠️

You can test this locally with the following command:

git-clang-format --diff HEAD~1 HEAD --extensions cpp,h -- llvm/lib/CodeGen/LowLevelTypeUtils.cpp llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp llvm/lib/Target/X86/GISel/X86LegalizerInfo.h

View the diff from clang-format here.

diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
index 344ff71bd..9229ddf61 100644
--- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
+++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
@@ -1086,7 +1086,7 @@ bool X86LegalizerInfo::legalizeIsFPClass(MachineInstr &MI,
 
   if (IsF80)
     return expandFPClassTestForF80(MI, MRI, Helper);
-  return  expandFPClassTestForF32OrF64(MI, MRI, Helper);
+  return expandFPClassTestForF32OrF64(MI, MRI, Helper);
 }
 
 bool X86LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,

RKSimon · 2025-07-15T08:40:06Z

llvm/test/CodeGen/X86/fpclass.ll

@@ -0,0 +1,323 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5


rename to isel-fpclass.ll

Test for PR #148801

Test for PR llvm/llvm-project#148801

Introduce a pseudo instruction carrying address and immediate modifiers as separate operands to be selected instead of a pair of `MOVKXi` and `PAC[ID][AB]` . The new pseudo instruction is expanded in AsmPrinter, so that `MOVKXi` is emitted immediately before `PAC[ID][AB]`. This way, an attacker cannot control the immediate modifier used to sign the value, even if address modifier can be substituted. To simplify the instruction selection, select `AArch64::PAC` pseudo using TableGen pattern and post-process its `$AddrDisc` operand by custom inserter hook - this eliminates duplication of the logic for DAGISel and GlobalISel. Furthermore, this improves cross-BB analysis in case of DAGISel.

Replacing an alloca with a call result in a lifetime intrinsic will cause a verifier error. Fixes llvm#150498.

This should be ignored since there are no VGPR forms. This makes it possible to flip the default for the flag to true.

Any non-zero `SubIdx` passed to the method is composed with the rematerialized instruction's first operand's subregister to determine the new register's subregister. In our case we want the new register to have the same subregister as the old one, so we should pass 0.

`getFallthroughsInTrace` requires CFG for functions not covered by BAT, even in BAT/fdata mode. BAT-covered functions go through special handling in fdata (`BAT->getFallthroughsInTrace`) and YAML (`DataAggregator::writeBATYAML`) modes. Since all modes (BAT/no-BAT, YAML/fdata) now need disassembly/CFG construction: - drop special BAT/fdata handling that omitted disassembly/CFG in `RewriteInstance::run`, enabling *CFG for all non-BAT functions*, - switch `getFallthroughsInTrace` to check if a function has CFG, - which *allows emitting profile for non-simple functions* in all modes. Previously, traces in non-simple functions were reported as invalid/ mismatching disassembled function contents. This change reduces the number of such invalid traces and increases the number of profiled functions. These functions may participate in function reordering via call graph profile. Test Plan: updated unclaimed-jt-entries.s

…lvm#150605) fixes llvm#149879 (comment) Note this happens because ADL can't disambiguate between `mlir::DenseI64ArrayAttr` and `llvm::ArrayRef<int64_t>` **for the value 0** which I guess is equal to nullptr on some (most?) systems. Note, this only occurs with the value 0.

…50561) This PR melds llvm#150137 and llvm#149414 *and* partially reverts llvm#124832. The summary is the `PyDenseResourceElementsAttribute` finalizer/deleter has/had two problems 1. wasn't threadsafe (can be called from a different thread than that which currently holds the GIL) 2. can be called while the interpreter is "not initialized" llvm#124832 for some reason decides to re-initialize the interpreter to avoid case 2 and runs afoul of the fact that `Py_IsInitialized` can be false during the finalization of the interpreter itself (e.g., at the end of a script). I don't know why this decision was made (I missed the PR) but I believe we should never be calling [Py_Initialize](https://docs.python.org/3/c-api/init.html#c.Py_Initialize): > In an application \*\*\*\***embedding Python**\*\*\*\*, this should be called before using any other Python/C API functions **but we aren't embedding Python**! So therefore we will only be in case 2 when the interpreter is being finalized and in that case we should just leak the buffer. Note, [lldb](https://github.com/llvm/llvm-project/blob/548ca9e97673a168023a616d311d901ca04b29a3/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp#L81-L93) does a similar sort of thing for its finalizers. Co-authored-by: Anton Korobeynikov <[email protected]> Co-authored-by: Max Manainen <[email protected]> Co-authored-by: Anton Korobeynikov <[email protected]> Co-authored-by: Max Manainen <[email protected]>

…lvm#150610)

…ffle (llvm#146694) After PR llvm#136329, shuffle indices may differ, which can cause the existing cost-based logic to miss optimisation opportunities for binop/shuffle sequences. This patch improves the cost model in foldSelectShuffle to more accurately assess costs, recognising when certain duplicate shuffles do not require actual instructions. Additionally, in break-even cases, this change introduces a check for whether the pattern ultimately feeds into a vector reduction, allowing the transform to proceed when it is likely to be profitable overall.

…inds

…egs to multiple vectors (llvm#142941)" (llvm#150505) Reverting due to reported miscompiles, will reland once it is fixed.

…euristic (llvm#133672) The vectorization heuristic of LoopInterchange attempts to move a vectorizable loop to the innermost position. Before this patch, a loop was deemed vectorizable if there are no loop-carried dependencies induced by the loop. This patch extends the vectorization heuristic by introducing the concept of forward and backward dependencies, inspired by LoopAccessAnalysis. Specifically, an additional element is appended to each direction vector to indicate whether it represents a forward dependency (`<`) or not (`*`). Among these, only the forward dependencies (i.e., those whose last element is `<`) affect the vectorization heuristic. Accordingly, the check is conservative, and dependencies are considered forward only when this can be proven. Currently, we only support perfectly nested loops whose body consists of a single basic block. For other cases, dependencies are pessimistically treated as non-forward.

Avoid parsing the entire ExecutionPartConstruct in either the strictly- or the loosely-structured block parser only to discard it when it's not BLOCK (or is BLOCK) respectively. Doing so was not incorrct, but in pathological cases (like Fujitsu 0981_0034) the recursive parsing can take a very long time. Instead, detect the presence of BLOCK first (via a simple lookahead), and fail immediately if necessary.

…m#147318) Instead using strings to look up device information (which is brittle and slow), use the new tags that the plugins specify when building the nodes.

… MapInfoFinalization (llvm#150311) Currently, we return early whenever we've already generated an allocation for intermediate descriptor variables (required in certain cases when we can't directly access the base address of a passes in descriptor function argument due to HLFIR/FIR restrictions). This unfortunately, skips over the presence check and load/store required to set the intermediate descriptor allocations values/data. This is fine in most cases, but if a function happens to have a series of branches with seperate target regions capturing the same input argument, we'd emit the present/load/store into the first branch with the first target inside of it, the secondary (or any preceding) branches would not have the present/load/store, this would lead to the subsequent mapped values in that branch being empty and then leading to a memory access violation on device. The fix for the moment is to emit a present/load/store at the relevant location of every target utilising the input argument, this likely will also lead to fixing possible issues with the input argument being manipulated inbetween target regions (primarily resizing, the data should remain the same as we're just copying an address around, in theory at least). There's possible optimizations/simplifications to emit less load/stores such as by raising the load/store out of the branches when we can, but I'm inclined to leave this sort of optimization to lower level passes such as an LLVM pass (which very possibly already covers it).

…149999) Reapply llvm#140091. branch-folder hoists common instructions from TBB and FBB into their pred. Without this patch it achieves this by splicing the instructions from TBB and deleting the common ones in FBB. That moves the debug locations and debug instructions from TBB into the pred without modification, which is not ideal. Debug locations are handled in llvm#140063. This patch handles debug instructions - in the simplest way possible, which is to just kill (undef) them. We kill and hoist the ones in FBB as well as TBB because otherwise the fact there's an assignment on the code path is deleted (which might lead to a prior location extending further than it should). There's possibly something we could do to preserve some variable locations in some cases, but this is the easiest not-incorrect thing to do. Note I had to replace the constant DBG_VALUEs to use registers in the test- it turns out setDebugValueUndef doesn't undef constant DBG_VALUEs... which feels wrong to me, but isn't something I want to touch right now.

report_fatal_error is not a good way to report diagnostics to the users, so this switches to using actual diagnostic reporting mechanisms instead. Fixes llvm#147187

If we have instructions in second loop's preheader which can be sunk, we should also be adjusting PHI nodes to receive values from the fused loop's latch block. Fixes llvm#128600

mahesh-attarde · 2025-07-28T11:24:28Z

Something went wrong here. closing

llvmbot added backend:X86 llvm:codegen labels Jul 15, 2025

RKSimon requested review from arsenm and e-kud July 15, 2025 08:39

RKSimon reviewed Jul 15, 2025

View reviewed changes

mahesh-attarde mentioned this pull request Jul 15, 2025

[X86][GlobalIsel] Add test for IS_FP_CLASS #148816

Merged

e-kud pushed a commit that referenced this pull request Jul 27, 2025

[X86][GlobalISel] Add test for IS_FP_CLASS (#148816)

bca80a0

Test for PR #148801

llvm-sync bot pushed a commit to arm/arm-toolchain that referenced this pull request Jul 27, 2025

Automerge: [X86][GlobalISel] Add test for IS_FP_CLASS (#148816)

794f733

Test for PR llvm/llvm-project#148801

atrosinenko and others added 20 commits July 28, 2025 04:21

[WebAssemblyOptimizeReturned] Skip lifetime intrinsic uses

24652ba

Replacing an alloca with a call result in a lifetime intrinsic will cause a verifier error. Fixes llvm#150498.

AMDGPU: Fix -amdgpu-mfma-vgpr-form flag on gfx908 (llvm#150599)

6e63566

This should be ignored since there are no VGPR forms. This makes it possible to flip the default for the flag to true.

[VPlan] Improve code in VPlanPredicator (NFC) (llvm#150416)

ff6abeb

[AMDGPU] Simplify SIInstrInfo::isLegalToSwap. NFC. (llvm#149058)

82ac868

[AMDGPU][Scheduler] Use AMDGPU::NoSubRegister instead of 0 (NFC) (l…

7f01f80

…lvm#150610)

[CostModel][X86] load-broadcast.ll - regenerate checks for all cost k…

f1d9269

…inds

TargetLibraryInfo: Bring FreeBSD function list up to date (llvm#144846)

673e614

Revert "[AArch64][Machine-Combiner] Split gather patterns into neon r…

59e4048

…egs to multiple vectors (llvm#142941)" (llvm#150505) Reverting due to reported miscompiles, will reland once it is fixed.

[Offload] Refactor device information queries to use new tagging (llv…

a8a5f67

…m#147318) Instead using strings to look up device information (which is brittle and slow), use the new tags that the plugins specify when building the nodes.

fix: replace report_fatal_error with Diags and exit (llvm#147959)

082f295

report_fatal_error is not a good way to report diagnostics to the users, so this switches to using actual diagnostic reporting mechanisms instead. Fixes llvm#147187

madhur13490 and others added 2 commits July 28, 2025 04:22

[LoopFusion] Fix sink instructions (llvm#147501)

62e6634

If we have instructions in second loop's preheader which can be sunk, we should also be adjusting PHI nodes to receive values from the fused loop's latch block. Fixes llvm#128600

fix test

163c94e

mahesh-attarde requested review from a team, Endilll, JDevlieghere, QuietMisdreavus, aaupov, andykaylor, ayermolo, bcardosolopes, cyndyishida, daniel-grumberg, lanza, maksfb, nikic, paschalis-mpeis, rafaelauler, xlauko and yota9 as code owners July 28, 2025 11:23

mahesh-attarde closed this Jul 28, 2025

mahesh-attarde deleted the gisel_fpclass_1 branch July 28, 2025 11:24

Endilll removed request for a team, Endilll, andykaylor and ayermolo July 28, 2025 15:50

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[X86][GlobalIsel] Support IS_FP_CLASS intrinsic 1/4 #148801

[X86][GlobalIsel] Support IS_FP_CLASS intrinsic 1/4 #148801

Uh oh!

mahesh-attarde commented Jul 15, 2025 •

edited

Loading

Uh oh!

llvmbot commented Jul 15, 2025

Uh oh!

github-actions bot commented Jul 15, 2025

Uh oh!

RKSimon Jul 15, 2025

Uh oh!

mahesh-attarde commented Jul 28, 2025

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

102 participants

		@@ -0,0 +1,323 @@
		; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5

[X86][GlobalIsel] Support IS_FP_CLASS intrinsic 1/4 #148801

[X86][GlobalIsel] Support IS_FP_CLASS intrinsic 1/4 #148801

Uh oh!

Conversation

mahesh-attarde commented Jul 15, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Jul 15, 2025

Uh oh!

github-actions bot commented Jul 15, 2025

Uh oh!

RKSimon Jul 15, 2025

Choose a reason for hiding this comment

Uh oh!

mahesh-attarde commented Jul 28, 2025

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

102 participants

mahesh-attarde commented Jul 15, 2025 •

edited

Loading