-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[X86][GlobalIsel] Support IS_FP_CLASS intrinsic 1/4 #148801
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-backend-x86 Author: Mahesh-Attarde (mahesh-attarde) ChangesThis patch implments is_fp_class intrinsic for globalisel. This is patch 1/4. Patch is 22.49 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/148801.diff 4 Files Affected:
diff --git a/llvm/lib/CodeGen/LowLevelTypeUtils.cpp b/llvm/lib/CodeGen/LowLevelTypeUtils.cpp
index 936c9fbb2fff0..226119384140e 100644
--- a/llvm/lib/CodeGen/LowLevelTypeUtils.cpp
+++ b/llvm/lib/CodeGen/LowLevelTypeUtils.cpp
@@ -80,6 +80,8 @@ const llvm::fltSemantics &llvm::getFltSemanticForLLT(LLT Ty) {
return APFloat::IEEEsingle();
case 64:
return APFloat::IEEEdouble();
+ case 80:
+ return APFloat::x87DoubleExtended();
case 128:
return APFloat::IEEEquad();
}
diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
index 7fe58539cd4ec..642104e447aac 100644
--- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
+++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
@@ -13,6 +13,7 @@
#include "X86LegalizerInfo.h"
#include "X86Subtarget.h"
#include "X86TargetMachine.h"
+#include "llvm/CodeGen/CodeGenCommonISel.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
@@ -579,6 +580,7 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
getActionDefinitionsBuilder({G_DYN_STACKALLOC, G_STACKSAVE, G_STACKRESTORE})
.lower();
+ getActionDefinitionsBuilder(G_IS_FPCLASS).custom();
// fp intrinsics
getActionDefinitionsBuilder(G_INTRINSIC_ROUNDEVEN)
.scalarize(0)
@@ -616,6 +618,8 @@ bool X86LegalizerInfo::legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI,
return legalizeFPTOSI(MI, MRI, Helper);
case TargetOpcode::G_GET_ROUNDING:
return legalizeGETROUNDING(MI, MRI, Helper);
+ case TargetOpcode::G_IS_FPCLASS:
+ return legalizeIsFPClass(MI, MRI, Helper);
}
llvm_unreachable("expected switch to return");
}
@@ -853,10 +857,236 @@ bool X86LegalizerInfo::legalizeGETROUNDING(MachineInstr &MI,
auto RetValTrunc = MIRBuilder.buildZExtOrTrunc(DstTy, RetVal);
MIRBuilder.buildCopy(Dst, RetValTrunc);
+ MI.eraseFromParent();
+ return true;
+}
+
+bool X86LegalizerInfo::expandFPClassTestForF32OrF64(
+ MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const {
+ MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
+ auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
+ FPClassTest Test = static_cast<FPClassTest>(MI.getOperand(2).getImm());
+ assert(!SrcTy.isVector() && "G_IS_FPCLASS does not support vectors yet");
+ const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
+
+ // Some checks may be represented as inversion of simpler check, for example
+ // "inf|normal|subnormal|zero" => !"nan".
+ bool IsInverted = false;
+
+ if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test, false)) {
+ Test = InvertedCheck;
+ IsInverted = true;
+ }
+
+ // In the general case use integer operations.
+ unsigned BitSize = SrcTy.getScalarSizeInBits();
+ LLT IntVT = LLT::scalar(BitSize);
+ // MachineInstrBuilder OpAsInt = MIRBuilder.buildBitcast(IntVT, SrcReg);
+ MachineInstrBuilder OpAsInt = MIRBuilder.buildCopy(IntVT, SrcReg);
+
+ // Various Mask
+ APInt SignMask = APInt::getSignMask(BitSize);
+ APInt ValueMask = APInt::getSignedMaxValue(BitSize);
+ APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt();
+ APInt InfPlus1 = Inf + 1;
+ APInt ExpMask = Inf;
+ APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
+ APInt QNaNBitMask =
+ APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
+ APInt InvertionMask = APInt::getAllOnes(DstTy.getScalarSizeInBits());
+
+ auto ValueMaskV = MIRBuilder.buildConstant(IntVT, ValueMask);
+ auto SignBitV = MIRBuilder.buildConstant(IntVT, SignMask);
+ auto ExpMaskV = MIRBuilder.buildConstant(IntVT, ExpMask);
+ auto ZeroV = MIRBuilder.buildConstant(IntVT, 0);
+ auto InfV = MIRBuilder.buildConstant(IntVT, Inf);
+ auto InfPlus1V = MIRBuilder.buildConstant(IntVT, InfPlus1);
+ auto ResultInvertedV = MIRBuilder.buildConstant(DstTy, InvertionMask);
+
+ MachineInstrBuilder Res;
+ const auto appendResult = [&](MachineInstrBuilder &PartialRes) {
+ if (PartialRes.getInstr()) {
+ if (Res.getInstr()) {
+ Res = MIRBuilder.buildOr(DstTy, Res, PartialRes);
+ } else {
+ Res = PartialRes;
+ }
+ }
+ };
+ // Split the value into sign bit and absolute value.
+ auto AbsV = MIRBuilder.buildAnd(IntVT, OpAsInt, ValueMaskV);
+ auto SignVDestReg = MRI.createGenericVirtualRegister(LLT::scalar(1));
+ auto SignV =
+ MIRBuilder.buildICmp(CmpInst::ICMP_SLT, SignVDestReg, OpAsInt, ZeroV);
+
+ // Tests that involve more than one class should be processed first.
+ MachineInstrBuilder PartialRes;
+
+ if ((Test & fcFinite) == fcFinite) {
+ // finite(V) ==> abs(V) < exp_mask
+ PartialRes = MIRBuilder.buildICmp(
+ IsInverted ? CmpInst::ICMP_SGE : CmpInst::ICMP_SLT,
+ MRI.createGenericVirtualRegister(LLT::scalar(1)), AbsV, ExpMaskV);
+ Test &= ~fcFinite;
+ } else if ((Test & fcFinite) == fcPosFinite) {
+ // finite(V) && V > 0 ==> V < exp_mask
+ PartialRes = MIRBuilder.buildICmp(
+ IsInverted ? CmpInst::ICMP_UGE : CmpInst::ICMP_ULT,
+ MRI.createGenericVirtualRegister(LLT::scalar(1)), OpAsInt, ExpMaskV);
+ Test &= ~fcPosFinite;
+ } else if ((Test & fcFinite) == fcNegFinite) {
+ // finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1
+ auto PartialResPart = MIRBuilder.buildICmp(
+ CmpInst::ICMP_SLT, MRI.createGenericVirtualRegister(LLT::scalar(1)),
+ AbsV, ExpMaskV);
+ PartialRes = MIRBuilder.buildAnd(LLT::scalar(1), PartialResPart, SignV);
+ Test &= ~fcNegFinite;
+ }
+ appendResult(PartialRes);
+
+ if (FPClassTest PartialCheck = Test & (fcZero | fcSubnormal)) {
+ // fcZero | fcSubnormal => test all exponent bits are 0
+ // TODO: Handle sign bit specific cases
+ if (PartialCheck == (fcZero | fcSubnormal)) {
+ auto ExpBits = MIRBuilder.buildAnd(IntVT, OpAsInt, ExpMaskV);
+ auto ExpIsZero = MIRBuilder.buildICmp(
+ CmpInst::ICMP_EQ, MRI.createGenericVirtualRegister(LLT::scalar(1)),
+ ExpBits, ZeroV);
+ appendResult(ExpIsZero);
+ Test &= ~PartialCheck & fcAllFlags;
+ }
+ }
+
+ // Check for individual classes.
+ if (unsigned PartialCheck = Test & fcZero) {
+ if (PartialCheck == fcPosZero)
+ PartialRes = MIRBuilder.buildICmp(
+ CmpInst::ICMP_EQ, MRI.createGenericVirtualRegister(LLT::scalar(1)),
+ OpAsInt, ZeroV);
+ else if (PartialCheck == fcZero)
+ PartialRes = MIRBuilder.buildICmp(
+ CmpInst::ICMP_EQ, MRI.createGenericVirtualRegister(LLT::scalar(1)),
+ AbsV, ZeroV);
+ else // ISD::fcNegZero
+ PartialRes = MIRBuilder.buildICmp(
+ CmpInst::ICMP_EQ, MRI.createGenericVirtualRegister(LLT::scalar(1)),
+ OpAsInt, SignBitV);
+ appendResult(PartialRes);
+ }
+ if (unsigned PartialCheck = Test & fcSubnormal) {
+ assert("Not Supported yet!");
+ }
+ if (unsigned PartialCheck = Test & fcInf) {
+ if (PartialCheck == fcPosInf)
+ PartialRes = MIRBuilder.buildICmp(
+ IsInverted ? CmpInst::ICMP_NE : CmpInst::ICMP_EQ,
+ MRI.createGenericVirtualRegister(LLT::scalar(1)), OpAsInt, InfV);
+ else if (PartialCheck == fcInf)
+ PartialRes = MIRBuilder.buildICmp(
+ IsInverted ? CmpInst::ICMP_NE : CmpInst::ICMP_EQ,
+ MRI.createGenericVirtualRegister(LLT::scalar(1)), AbsV, InfV);
+ else { // ISD::fcNegInf
+ APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
+ auto NegInfV = MIRBuilder.buildConstant(IntVT, NegInf);
+ PartialRes = MIRBuilder.buildICmp(
+ IsInverted ? CmpInst::ICMP_NE : CmpInst::ICMP_EQ,
+ MRI.createGenericVirtualRegister(LLT::scalar(1)), OpAsInt, NegInfV);
+ }
+ MIRBuilder.buildCopy(DstReg, PartialRes);
+ MI.eraseFromParent();
+ return true;
+ }
+ if (unsigned PartialCheck = Test & fcNan) {
+ APInt InfWithQnanBit = Inf | QNaNBitMask;
+ auto InfWithQnanBitV = MIRBuilder.buildConstant(IntVT, InfWithQnanBit);
+ if (PartialCheck == fcNan) {
+ // isnan(V) ==> abs(V) > int(inf)
+ auto AbsDstReg = MRI.createGenericVirtualRegister(LLT::scalar(BitSize));
+ auto FAbsV = MIRBuilder.buildCopy(AbsDstReg, SrcReg);
+ auto InfVDstReg = MRI.createGenericVirtualRegister(LLT::scalar(BitSize));
+ PartialRes = MIRBuilder.buildFCmp(
+ CmpInst::FCMP_UEQ, MRI.createGenericVirtualRegister(LLT::scalar(1)),
+ FAbsV, FAbsV);
+ } else if (PartialCheck == fcQNan) {
+ // isquiet(V) ==> abs(V) >= (unsigned(Inf) | quiet_bit)
+ PartialRes = MIRBuilder.buildICmp(
+ IsInverted ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGE,
+ MRI.createGenericVirtualRegister(LLT::scalar(1)), AbsV,
+ InfWithQnanBitV);
+
+ } else { // ISD::fcSNan
+ // issignaling(V) ==> abs(V) > unsigned(Inf) &&
+ // abs(V) < (unsigned(Inf) | quiet_bit)
+ auto IsNotQnan = MIRBuilder.buildICmp(
+ CmpInst::ICMP_SLT, MRI.createGenericVirtualRegister(LLT::scalar(1)),
+ AbsV, InfWithQnanBitV);
+ auto IsNan = MIRBuilder.buildICmp(
+ CmpInst::ICMP_SGE, MRI.createGenericVirtualRegister(LLT::scalar(1)),
+ AbsV, InfPlus1V);
+ PartialRes = MIRBuilder.buildAnd(LLT::scalar(1), IsNan, IsNotQnan);
+ }
+ MIRBuilder.buildCopy(DstReg, PartialRes);
+ MI.eraseFromParent();
+ return true;
+ }
+ if (unsigned PartialCheck = Test & fcNormal) {
+ assert("Not Supported yet!");
+ }
+ if (unsigned PartialCheck = Test & fcSubnormal) {
+ // subnormal(V) ==> abs(V) < exp_mask && signbit == 0
+ auto ExpBits = MIRBuilder.buildAnd(IntVT, OpAsInt, ExpMaskV);
+ auto ExpIsZero = MIRBuilder.buildICmp(
+ CmpInst::ICMP_EQ, MRI.createGenericVirtualRegister(LLT::scalar(1)),
+ ExpBits, ZeroV);
+ auto SignBit = MIRBuilder.buildICmp(
+ CmpInst::ICMP_EQ, MRI.createGenericVirtualRegister(LLT::scalar(1)),
+ SignV, ZeroV);
+ PartialRes = MIRBuilder.buildAnd(LLT::scalar(1), ExpIsZero, SignBit);
+ appendResult(PartialRes);
+ }
+ if (!Res.getInstr()) {
+ Res = MIRBuilder.buildConstant(LLT::scalar(1), IsInverted);
+ MIRBuilder.buildCopy(DstReg, Res);
+ MI.eraseFromParent();
+ return true;
+ }
+ MIRBuilder.buildCopy(DstReg, Res);
MI.eraseFromParent();
return true;
}
+bool X86LegalizerInfo::expandFPClassTestForF80(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ LegalizerHelper &Helper) const {
+ return false;
+}
+
+bool X86LegalizerInfo::legalizeIsFPClass(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ LegalizerHelper &Helper) const {
+ MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
+ auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
+ assert(!SrcTy.isVector() && "G_IS_FPCLASS does not support vectors yet");
+
+ FPClassTest Mask = static_cast<FPClassTest>(MI.getOperand(2).getImm());
+ if (Mask == fcNone) {
+ MIRBuilder.buildConstant(DstReg, 0);
+ MI.eraseFromParent();
+ return true;
+ }
+ if (Mask == fcAllFlags) {
+ MIRBuilder.buildConstant(DstReg, 1);
+ MI.eraseFromParent();
+ return true;
+ }
+ bool IsF80 = (SrcTy == LLT::scalar(80));
+ // For f32/f64/f80 if NoFpException is set, we can use the FCMP
+ // Some checks can be implemented using float comparisons, if floating point
+ // exceptions are ignored.
+
+ if (IsF80)
+ return expandFPClassTestForF80(MI, MRI, Helper);
+}
bool X86LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
MachineInstr &MI) const {
diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h
index 0003552d70ee0..107dd1c8af605 100644
--- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h
+++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h
@@ -57,6 +57,12 @@ class X86LegalizerInfo : public LegalizerInfo {
bool legalizeGETROUNDING(MachineInstr &MI, MachineRegisterInfo &MRI,
LegalizerHelper &Helper) const;
+ bool expandFPClassTestForF32OrF64(MachineInstr &MI, MachineRegisterInfo &MRI,
+ LegalizerHelper &Helper) const;
+ bool expandFPClassTestForF80(MachineInstr &MI, MachineRegisterInfo &MRI,
+ LegalizerHelper &Helper) const;
+ bool legalizeIsFPClass(MachineInstr &MI, MachineRegisterInfo &MRI,
+ LegalizerHelper &Helper) const;
};
} // namespace llvm
#endif
diff --git a/llvm/test/CodeGen/X86/fpclass.ll b/llvm/test/CodeGen/X86/fpclass.ll
new file mode 100644
index 0000000000000..80c3d579871aa
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fpclass.ll
@@ -0,0 +1,323 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=i686-linux | FileCheck %s -check-prefixes=X86
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefixes=X64
+; RUN: llc < %s -mtriple=i686-linux -global-isel | FileCheck %s -check-prefixes=X86,X86-GISEL
+; RUN: llc < %s -mtriple=x86_64-linux -global-isel | FileCheck %s -check-prefixes=X64,X64-GISEL
+
+; FIXME: We can reuse llvm/test/CodeGen/X86/is_fpclass.ll when all patches are included.
+
+define i1 @is_fcNone_f32(float %x) nounwind {
+; X86-LABEL: is_fcNone_f32:
+; X86: # %bb.0: # %entry
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: is_fcNone_f32:
+; X64: # %bb.0: # %entry
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: retq
+entry:
+ %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 0)
+ ret i1 %0
+}
+
+define i1 @is_fcAllFlags_f32(float %x) nounwind {
+; X86-LABEL: is_fcAllFlags_f32:
+; X86: # %bb.0: # %entry
+; X86-NEXT: movb $1, %al
+; X86-NEXT: retl
+;
+; X64-LABEL: is_fcAllFlags_f32:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movb $1, %al
+; X64-NEXT: retq
+entry:
+ %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1023)
+ ret i1 %0
+}
+
+define i1 @issignaling_f(float %x) {
+; X64-LABEL: issignaling_f:
+; X64: # %bb.0:
+; X64-NEXT: movd %xmm0, %eax
+; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; X64-NEXT: setl %cl
+; X64-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001
+; X64-NEXT: setge %al
+; X64-NEXT: andb %cl, %al
+; X64-NEXT: retq
+;
+; X86-GISEL-LABEL: issignaling_f:
+; X86-GISEL: # %bb.0:
+; X86-GISEL-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-GISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-GISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; X86-GISEL-NEXT: setl %cl
+; X86-GISEL-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001
+; X86-GISEL-NEXT: setge %al
+; X86-GISEL-NEXT: andb %cl, %al
+; X86-GISEL-NEXT: retl
+ %a0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1) ; "snan"
+ ret i1 %a0
+}
+
+ define i1 @isquiet_f(float %x) {
+; X64-LABEL: isquiet_f:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movd %xmm0, %eax
+; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; X64-NEXT: setge %al
+; X64-NEXT: retq
+;
+; X86-GISEL-LABEL: isquiet_f:
+; X86-GISEL: # %bb.0: # %entry
+; X86-GISEL-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-GISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-GISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; X86-GISEL-NEXT: setge %al
+; X86-GISEL-NEXT: retl
+ entry:
+ %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 2) ; "qnan"
+ ret i1 %0
+}
+
+define i1 @not_isquiet_f(float %x) {
+; X64-LABEL: not_isquiet_f:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movd %xmm0, %eax
+; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; X64-NEXT: setl %al
+; X64-NEXT: retq
+;
+; X86-GISEL-LABEL: not_isquiet_f:
+; X86-GISEL: # %bb.0: # %entry
+; X86-GISEL-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-GISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-GISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; X86-GISEL-NEXT: setl %al
+; X86-GISEL-NEXT: retl
+entry:
+ %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1021) ; ~"qnan"
+ ret i1 %0
+}
+
+define i1 @isinf_f(float %x) {
+; X64-LABEL: isinf_f:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movd %xmm0, %eax
+; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-NEXT: sete %al
+; X64-NEXT: retq
+;
+; X86-GISEL-LABEL: isinf_f:
+; X86-GISEL: # %bb.0: # %entry
+; X86-GISEL-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-GISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-GISEL-NEXT: sete %al
+; X86-GISEL-NEXT: retl
+entry:
+ %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 516) ; 0x204 = "inf"
+ ret i1 %0
+}
+
+define i1 @not_isinf_f(float %x) {
+; X64-LABEL: not_isinf_f:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movd %xmm0, %eax
+; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-NEXT: setne %al
+; X64-NEXT: retq
+;
+; X86-GISEL-LABEL: not_isinf_f:
+; X86-GISEL: # %bb.0: # %entry
+; X86-GISEL-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-GISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-GISEL-NEXT: setne %al
+; X86-GISEL-NEXT: retl
+entry:
+ %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 507) ; ~0x204 = "~inf"
+ ret i1 %0
+}
+
+define i1 @is_plus_inf_f(float %x) {
+; X86-LABEL: is_plus_inf_f:
+; X86: # %bb.0: # %entry
+; X86-NEXT: cmpl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000
+; X86-NEXT: sete %al
+; X86-NEXT: retl
+;
+; X64-LABEL: is_plus_inf_f:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movd %xmm0, %eax
+; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-NEXT: sete %al
+; X64-NEXT: retq
+entry:
+ %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 512) ; 0x200 = "+inf"
+ ret i1 %0
+}
+
+define i1 @is_minus_inf_f(float %x) {
+; X86-LABEL: is_minus_inf_f:
+; X86: # %bb.0: # %entry
+; X86-NEXT: cmpl $-8388608, {{[0-9]+}}(%esp) # imm = 0xFF800000
+; X86-NEXT: sete %al
+; X86-NEXT: retl
+;
+; X64-LABEL: is_minus_inf_f:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movd %xmm0, %eax
+; X64-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000
+; X64-NEXT: sete %al
+; X64-NEXT: retq
+entry:
+ %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 4) ; "-inf"
+ ret i1 %0
+}
+
+define i1 @not_is_minus_inf_f(float %x) {
+; X86-LABEL: not_is_minus_inf_f:
+; X86: # %bb.0: # %entry
+; X86-NEXT: cmpl $-8388608, {{[0-9]+}}(%esp) # imm = 0xFF800000
+; X86-NEXT: setne %al
+; X86-NEXT: retl
+;
+; X64-LABEL: not_is_minus_inf_f:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movd %xmm0, %eax
+; X64-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000
+; X64-NEXT: setne %al
+; X64-NEXT: retq
+entry:
+ %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1019) ; ~"-inf"
+ ret i1 %0
+}
+
+define i1 @isfinite_f(float %x) {
+; X64-LABEL: isfinite_f:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movd %xmm0, %eax
+; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-NEXT: setl %al
+; X64-NEXT: retq
+;
+; X86-GISEL-LABEL: isfinite_f:
+; X86-GISEL: # %bb.0: # %entry
+; X86-GISEL-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-GISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-GISEL-NEXT: setl %al
+; X86-GISEL-NEXT: retl
+entry:
+ %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 504) ; 0x1f8 = "finite"
+ ret i1 %0
+}
+
+define i1 @not_isfinite_f(float %x) {
+; X64-LABEL: not_isfinite_f:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movd %xmm0, %eax
+; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-NEXT: setge %al
+; X64-NEXT: retq
+;
+; X86-GISEL-LABEL: no...
[truncated]
|
You can test this locally with the following command:git-clang-format --diff HEAD~1 HEAD --extensions cpp,h -- llvm/lib/CodeGen/LowLevelTypeUtils.cpp llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp llvm/lib/Target/X86/GISel/X86LegalizerInfo.hView the diff from clang-format here.diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
index 344ff71bd..9229ddf61 100644
--- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
+++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
@@ -1086,7 +1086,7 @@ bool X86LegalizerInfo::legalizeIsFPClass(MachineInstr &MI,
if (IsF80)
return expandFPClassTestForF80(MI, MRI, Helper);
- return expandFPClassTestForF32OrF64(MI, MRI, Helper);
+ return expandFPClassTestForF32OrF64(MI, MRI, Helper);
}
bool X86LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
|
llvm/test/CodeGen/X86/fpclass.ll
Outdated
| @@ -0,0 +1,323 @@ | |||
| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 | |||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
rename to isel-fpclass.ll
Introduce a pseudo instruction carrying address and immediate modifiers as separate operands to be selected instead of a pair of `MOVKXi` and `PAC[ID][AB]` . The new pseudo instruction is expanded in AsmPrinter, so that `MOVKXi` is emitted immediately before `PAC[ID][AB]`. This way, an attacker cannot control the immediate modifier used to sign the value, even if address modifier can be substituted. To simplify the instruction selection, select `AArch64::PAC` pseudo using TableGen pattern and post-process its `$AddrDisc` operand by custom inserter hook - this eliminates duplication of the logic for DAGISel and GlobalISel. Furthermore, this improves cross-BB analysis in case of DAGISel.
Replacing an alloca with a call result in a lifetime intrinsic will cause a verifier error. Fixes llvm#150498.
This should be ignored since there are no VGPR forms. This makes it possible to flip the default for the flag to true.
Any non-zero `SubIdx` passed to the method is composed with the rematerialized instruction's first operand's subregister to determine the new register's subregister. In our case we want the new register to have the same subregister as the old one, so we should pass 0.
`getFallthroughsInTrace` requires CFG for functions not covered by BAT, even in BAT/fdata mode. BAT-covered functions go through special handling in fdata (`BAT->getFallthroughsInTrace`) and YAML (`DataAggregator::writeBATYAML`) modes. Since all modes (BAT/no-BAT, YAML/fdata) now need disassembly/CFG construction: - drop special BAT/fdata handling that omitted disassembly/CFG in `RewriteInstance::run`, enabling *CFG for all non-BAT functions*, - switch `getFallthroughsInTrace` to check if a function has CFG, - which *allows emitting profile for non-simple functions* in all modes. Previously, traces in non-simple functions were reported as invalid/ mismatching disassembled function contents. This change reduces the number of such invalid traces and increases the number of profiled functions. These functions may participate in function reordering via call graph profile. Test Plan: updated unclaimed-jt-entries.s
…lvm#150605) fixes llvm#149879 (comment) Note this happens because ADL can't disambiguate between `mlir::DenseI64ArrayAttr` and `llvm::ArrayRef<int64_t>` **for the value 0** which I guess is equal to nullptr on some (most?) systems. Note, this only occurs with the value 0.
…50561) This PR melds llvm#150137 and llvm#149414 *and* partially reverts llvm#124832. The summary is the `PyDenseResourceElementsAttribute` finalizer/deleter has/had two problems 1. wasn't threadsafe (can be called from a different thread than that which currently holds the GIL) 2. can be called while the interpreter is "not initialized" llvm#124832 for some reason decides to re-initialize the interpreter to avoid case 2 and runs afoul of the fact that `Py_IsInitialized` can be false during the finalization of the interpreter itself (e.g., at the end of a script). I don't know why this decision was made (I missed the PR) but I believe we should never be calling [Py_Initialize](https://docs.python.org/3/c-api/init.html#c.Py_Initialize): > In an application \*\*\*\***embedding Python**\*\*\*\*, this should be called before using any other Python/C API functions **but we aren't embedding Python**! So therefore we will only be in case 2 when the interpreter is being finalized and in that case we should just leak the buffer. Note, [lldb](https://github.com/llvm/llvm-project/blob/548ca9e97673a168023a616d311d901ca04b29a3/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp#L81-L93) does a similar sort of thing for its finalizers. Co-authored-by: Anton Korobeynikov <[email protected]> Co-authored-by: Max Manainen <[email protected]> Co-authored-by: Anton Korobeynikov <[email protected]> Co-authored-by: Max Manainen <[email protected]>
…ffle (llvm#146694) After PR llvm#136329, shuffle indices may differ, which can cause the existing cost-based logic to miss optimisation opportunities for binop/shuffle sequences. This patch improves the cost model in foldSelectShuffle to more accurately assess costs, recognising when certain duplicate shuffles do not require actual instructions. Additionally, in break-even cases, this change introduces a check for whether the pattern ultimately feeds into a vector reduction, allowing the transform to proceed when it is likely to be profitable overall.
…egs to multiple vectors (llvm#142941)" (llvm#150505) Reverting due to reported miscompiles, will reland once it is fixed.
…euristic (llvm#133672) The vectorization heuristic of LoopInterchange attempts to move a vectorizable loop to the innermost position. Before this patch, a loop was deemed vectorizable if there are no loop-carried dependencies induced by the loop. This patch extends the vectorization heuristic by introducing the concept of forward and backward dependencies, inspired by LoopAccessAnalysis. Specifically, an additional element is appended to each direction vector to indicate whether it represents a forward dependency (`<`) or not (`*`). Among these, only the forward dependencies (i.e., those whose last element is `<`) affect the vectorization heuristic. Accordingly, the check is conservative, and dependencies are considered forward only when this can be proven. Currently, we only support perfectly nested loops whose body consists of a single basic block. For other cases, dependencies are pessimistically treated as non-forward.
Avoid parsing the entire ExecutionPartConstruct in either the strictly- or the loosely-structured block parser only to discard it when it's not BLOCK (or is BLOCK) respectively. Doing so was not incorrct, but in pathological cases (like Fujitsu 0981_0034) the recursive parsing can take a very long time. Instead, detect the presence of BLOCK first (via a simple lookahead), and fail immediately if necessary.
…m#147318) Instead using strings to look up device information (which is brittle and slow), use the new tags that the plugins specify when building the nodes.
… MapInfoFinalization (llvm#150311) Currently, we return early whenever we've already generated an allocation for intermediate descriptor variables (required in certain cases when we can't directly access the base address of a passes in descriptor function argument due to HLFIR/FIR restrictions). This unfortunately, skips over the presence check and load/store required to set the intermediate descriptor allocations values/data. This is fine in most cases, but if a function happens to have a series of branches with seperate target regions capturing the same input argument, we'd emit the present/load/store into the first branch with the first target inside of it, the secondary (or any preceding) branches would not have the present/load/store, this would lead to the subsequent mapped values in that branch being empty and then leading to a memory access violation on device. The fix for the moment is to emit a present/load/store at the relevant location of every target utilising the input argument, this likely will also lead to fixing possible issues with the input argument being manipulated inbetween target regions (primarily resizing, the data should remain the same as we're just copying an address around, in theory at least). There's possible optimizations/simplifications to emit less load/stores such as by raising the load/store out of the branches when we can, but I'm inclined to leave this sort of optimization to lower level passes such as an LLVM pass (which very possibly already covers it).
…149999) Reapply llvm#140091. branch-folder hoists common instructions from TBB and FBB into their pred. Without this patch it achieves this by splicing the instructions from TBB and deleting the common ones in FBB. That moves the debug locations and debug instructions from TBB into the pred without modification, which is not ideal. Debug locations are handled in llvm#140063. This patch handles debug instructions - in the simplest way possible, which is to just kill (undef) them. We kill and hoist the ones in FBB as well as TBB because otherwise the fact there's an assignment on the code path is deleted (which might lead to a prior location extending further than it should). There's possibly something we could do to preserve some variable locations in some cases, but this is the easiest not-incorrect thing to do. Note I had to replace the constant DBG_VALUEs to use registers in the test- it turns out setDebugValueUndef doesn't undef constant DBG_VALUEs... which feels wrong to me, but isn't something I want to touch right now.
report_fatal_error is not a good way to report diagnostics to the users, so this switches to using actual diagnostic reporting mechanisms instead. Fixes llvm#147187
If we have instructions in second loop's preheader which can be sunk, we should also be adjusting PHI nodes to receive values from the fused loop's latch block. Fixes llvm#128600
|
Something went wrong here. closing |
This patch implments is_fp_class intrinsic for globalisel. This is patch 1/4.
test fpclass.ll will be added by new PR like always, here it is representative.