diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 1c889d67c81e0..d36e38884e3a2 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1758,10 +1758,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, for (auto Opcode : {ISD::FCEIL, ISD::FDIV, ISD::FFLOOR, ISD::FNEARBYINT, ISD::FRINT, - ISD::FROUND, ISD::FROUNDEVEN, ISD::FSQRT, ISD::FTRUNC}) { + ISD::FROUND, ISD::FROUNDEVEN, ISD::FSQRT, ISD::FTRUNC, ISD::SETCC}) { setOperationPromotedToType(Opcode, MVT::nxv2bf16, MVT::nxv2f32); setOperationPromotedToType(Opcode, MVT::nxv4bf16, MVT::nxv4f32); - setOperationAction(Opcode, MVT::nxv8bf16, Expand); + setOperationPromotedToType(Opcode, MVT::nxv8bf16, MVT::nxv8f32); } if (!Subtarget->hasSVEB16B16()) { @@ -1769,7 +1769,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, ISD::FMINIMUM, ISD::FMINNUM, ISD::FMUL, ISD::FSUB}) { setOperationPromotedToType(Opcode, MVT::nxv2bf16, MVT::nxv2f32); setOperationPromotedToType(Opcode, MVT::nxv4bf16, MVT::nxv4f32); - setOperationAction(Opcode, MVT::nxv8bf16, Expand); + setOperationPromotedToType(Opcode, MVT::nxv8bf16, MVT::nxv8f32); } } diff --git a/llvm/test/CodeGen/AArch64/sve-bf16-compares.ll b/llvm/test/CodeGen/AArch64/sve-bf16-compares.ll new file mode 100644 index 0000000000000..e0bf755c851b7 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-bf16-compares.ll @@ -0,0 +1,1001 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --extra_scrub +; RUN: llc -mattr=+sve < %s | FileCheck %s +; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +; +; OEQ +; + +define @fcmp_oeq_nxv2bf16( %a, %b) { +; CHECK-LABEL: fcmp_oeq_nxv2bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: ret + %res = fcmp oeq %a, %b + ret %res +} + +define @fcmp_oeq_nxv4bf16( %a, %b) { +; CHECK-LABEL: fcmp_oeq_nxv4bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: ret + %res = fcmp oeq %a, %b + ret %res +} + +define @fcmp_oeq_nxv8bf16( %a, %b) { +; CHECK-LABEL: fcmp_oeq_nxv8bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpkhi z2.s, z1.h +; CHECK-NEXT: uunpkhi z3.s, z0.h +; CHECK-NEXT: uunpklo z1.s, z1.h +; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: lsl z2.s, z2.s, #16 +; CHECK-NEXT: lsl z3.s, z3.s, #16 +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: fcmeq p1.s, p0/z, z3.s, z2.s +; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h +; CHECK-NEXT: ret + %res = fcmp oeq %a, %b + ret %res +} + +; +; OGT +; + +define @fcmp_ogt_nxv2bf16( %a, %b) { +; CHECK-LABEL: fcmp_ogt_nxv2bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: ret + %res = fcmp ogt %a, %b + ret %res +} + +define @fcmp_ogt_nxv4bf16( %a, %b) { +; CHECK-LABEL: fcmp_ogt_nxv4bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: ret + %res = fcmp ogt %a, %b + ret %res +} + +define @fcmp_ogt_nxv8bf16( %a, %b) { +; CHECK-LABEL: fcmp_ogt_nxv8bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpkhi z2.s, z1.h +; CHECK-NEXT: uunpkhi z3.s, z0.h +; CHECK-NEXT: uunpklo z1.s, z1.h +; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: lsl z2.s, z2.s, #16 +; CHECK-NEXT: lsl z3.s, z3.s, #16 +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: fcmgt p1.s, p0/z, z3.s, z2.s +; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h +; CHECK-NEXT: ret + %res = fcmp ogt %a, %b + ret %res +} + +; +; OGE +; + +define @fcmp_oge_nxv2bf16( %a, %b) { +; CHECK-LABEL: fcmp_oge_nxv2bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcmge p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: ret + %res = fcmp oge %a, %b + ret %res +} + +define @fcmp_oge_nxv4bf16( %a, %b) { +; CHECK-LABEL: fcmp_oge_nxv4bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcmge p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: ret + %res = fcmp oge %a, %b + ret %res +} + +define @fcmp_oge_nxv8bf16( %a, %b) { +; CHECK-LABEL: fcmp_oge_nxv8bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpkhi z2.s, z1.h +; CHECK-NEXT: uunpkhi z3.s, z0.h +; CHECK-NEXT: uunpklo z1.s, z1.h +; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: lsl z2.s, z2.s, #16 +; CHECK-NEXT: lsl z3.s, z3.s, #16 +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: fcmge p1.s, p0/z, z3.s, z2.s +; CHECK-NEXT: fcmge p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h +; CHECK-NEXT: ret + %res = fcmp oge %a, %b + ret %res +} + +; +; OLT +; + +define @fcmp_olt_nxv2bf16( %a, %b) { +; CHECK-LABEL: fcmp_olt_nxv2bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcmgt p0.s, p0/z, z1.s, z0.s +; CHECK-NEXT: ret + %res = fcmp olt %a, %b + ret %res +} + +define @fcmp_olt_nxv4bf16( %a, %b) { +; CHECK-LABEL: fcmp_olt_nxv4bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcmgt p0.s, p0/z, z1.s, z0.s +; CHECK-NEXT: ret + %res = fcmp olt %a, %b + ret %res +} + +define @fcmp_olt_nxv8bf16( %a, %b) { +; CHECK-LABEL: fcmp_olt_nxv8bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpkhi z2.s, z0.h +; CHECK-NEXT: uunpkhi z3.s, z1.h +; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: uunpklo z1.s, z1.h +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: lsl z2.s, z2.s, #16 +; CHECK-NEXT: lsl z3.s, z3.s, #16 +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: fcmgt p1.s, p0/z, z3.s, z2.s +; CHECK-NEXT: fcmgt p0.s, p0/z, z1.s, z0.s +; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h +; CHECK-NEXT: ret + %res = fcmp olt %a, %b + ret %res +} + +; +; OLE +; + +define @fcmp_ole_nxv2bf16( %a, %b) { +; CHECK-LABEL: fcmp_ole_nxv2bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcmge p0.s, p0/z, z1.s, z0.s +; CHECK-NEXT: ret + %res = fcmp ole %a, %b + ret %res +} + +define @fcmp_ole_nxv4bf16( %a, %b) { +; CHECK-LABEL: fcmp_ole_nxv4bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcmge p0.s, p0/z, z1.s, z0.s +; CHECK-NEXT: ret + %res = fcmp ole %a, %b + ret %res +} + +define @fcmp_ole_nxv8bf16( %a, %b) { +; CHECK-LABEL: fcmp_ole_nxv8bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpkhi z2.s, z0.h +; CHECK-NEXT: uunpkhi z3.s, z1.h +; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: uunpklo z1.s, z1.h +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: lsl z2.s, z2.s, #16 +; CHECK-NEXT: lsl z3.s, z3.s, #16 +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: fcmge p1.s, p0/z, z3.s, z2.s +; CHECK-NEXT: fcmge p0.s, p0/z, z1.s, z0.s +; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h +; CHECK-NEXT: ret + %res = fcmp ole %a, %b + ret %res +} + +; +; ONE +; + +define @fcmp_one_nxv2bf16( %a, %b) { +; CHECK-LABEL: fcmp_one_nxv2bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcmgt p1.s, p0/z, z1.s, z0.s +; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b +; CHECK-NEXT: ret + %res = fcmp one %a, %b + ret %res +} + +define @fcmp_one_nxv4bf16( %a, %b) { +; CHECK-LABEL: fcmp_one_nxv4bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcmgt p1.s, p0/z, z1.s, z0.s +; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b +; CHECK-NEXT: ret + %res = fcmp one %a, %b + ret %res +} + +define @fcmp_one_nxv8bf16( %a, %b) { +; CHECK-LABEL: fcmp_one_nxv8bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpkhi z2.s, z0.h +; CHECK-NEXT: uunpkhi z3.s, z1.h +; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: uunpklo z1.s, z1.h +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: lsl z2.s, z2.s, #16 +; CHECK-NEXT: lsl z3.s, z3.s, #16 +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: fcmgt p1.s, p0/z, z3.s, z2.s +; CHECK-NEXT: fcmgt p2.s, p0/z, z2.s, z3.s +; CHECK-NEXT: fcmgt p3.s, p0/z, z1.s, z0.s +; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: mov p1.b, p2/m, p2.b +; CHECK-NEXT: sel p0.b, p0, p0.b, p3.b +; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h +; CHECK-NEXT: ret + %res = fcmp one %a, %b + ret %res +} + +; +; ORD +; + +define @fcmp_ord_nxv2bf16( %a, %b) { +; CHECK-LABEL: fcmp_ord_nxv2bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcmuo p1.s, p0/z, z0.s, z1.s +; CHECK-NEXT: not p0.b, p0/z, p1.b +; CHECK-NEXT: ret + %res = fcmp ord %a, %b + ret %res +} + +define @fcmp_ord_nxv4bf16( %a, %b) { +; CHECK-LABEL: fcmp_ord_nxv4bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcmuo p1.s, p0/z, z0.s, z1.s +; CHECK-NEXT: not p0.b, p0/z, p1.b +; CHECK-NEXT: ret + %res = fcmp ord %a, %b + ret %res +} + +define @fcmp_ord_nxv8bf16( %a, %b) { +; CHECK-LABEL: fcmp_ord_nxv8bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpkhi z2.s, z1.h +; CHECK-NEXT: uunpkhi z3.s, z0.h +; CHECK-NEXT: uunpklo z1.s, z1.h +; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: lsl z2.s, z2.s, #16 +; CHECK-NEXT: lsl z3.s, z3.s, #16 +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: fcmuo p1.s, p0/z, z3.s, z2.s +; CHECK-NEXT: fcmuo p2.s, p0/z, z0.s, z1.s +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: not p0.b, p0/z, p2.b +; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h +; CHECK-NEXT: ret + %res = fcmp ord %a, %b + ret %res +} + +; +; UEQ +; + +define @fcmp_ueq_nxv2bf16( %a, %b) { +; CHECK-LABEL: fcmp_ueq_nxv2bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcmuo p1.s, p0/z, z0.s, z1.s +; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b +; CHECK-NEXT: ret + %res = fcmp ueq %a, %b + ret %res +} + +define @fcmp_ueq_nxv4bf16( %a, %b) { +; CHECK-LABEL: fcmp_ueq_nxv4bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcmuo p1.s, p0/z, z0.s, z1.s +; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b +; CHECK-NEXT: ret + %res = fcmp ueq %a, %b + ret %res +} + +define @fcmp_ueq_nxv8bf16( %a, %b) { +; CHECK-LABEL: fcmp_ueq_nxv8bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpkhi z2.s, z1.h +; CHECK-NEXT: uunpkhi z3.s, z0.h +; CHECK-NEXT: uunpklo z1.s, z1.h +; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: lsl z2.s, z2.s, #16 +; CHECK-NEXT: lsl z3.s, z3.s, #16 +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: fcmuo p1.s, p0/z, z3.s, z2.s +; CHECK-NEXT: fcmeq p2.s, p0/z, z3.s, z2.s +; CHECK-NEXT: fcmuo p3.s, p0/z, z0.s, z1.s +; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: mov p1.b, p2/m, p2.b +; CHECK-NEXT: sel p0.b, p0, p0.b, p3.b +; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h +; CHECK-NEXT: ret + %res = fcmp ueq %a, %b + ret %res +} + +; +; UGT +; + +define @fcmp_ugt_nxv2bf16( %a, %b) { +; CHECK-LABEL: fcmp_ugt_nxv2bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcmge p1.s, p0/z, z1.s, z0.s +; CHECK-NEXT: not p0.b, p0/z, p1.b +; CHECK-NEXT: ret + %res = fcmp ugt %a, %b + ret %res +} + +define @fcmp_ugt_nxv4bf16( %a, %b) { +; CHECK-LABEL: fcmp_ugt_nxv4bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcmge p1.s, p0/z, z1.s, z0.s +; CHECK-NEXT: not p0.b, p0/z, p1.b +; CHECK-NEXT: ret + %res = fcmp ugt %a, %b + ret %res +} + +define @fcmp_ugt_nxv8bf16( %a, %b) { +; CHECK-LABEL: fcmp_ugt_nxv8bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpkhi z2.s, z0.h +; CHECK-NEXT: uunpkhi z3.s, z1.h +; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: uunpklo z1.s, z1.h +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: lsl z2.s, z2.s, #16 +; CHECK-NEXT: lsl z3.s, z3.s, #16 +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: fcmge p1.s, p0/z, z3.s, z2.s +; CHECK-NEXT: fcmge p2.s, p0/z, z1.s, z0.s +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: not p0.b, p0/z, p2.b +; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h +; CHECK-NEXT: ret + %res = fcmp ugt %a, %b + ret %res +} + +; +; UGE +; + +define @fcmp_uge_nxv2bf16( %a, %b) { +; CHECK-LABEL: fcmp_uge_nxv2bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcmgt p1.s, p0/z, z1.s, z0.s +; CHECK-NEXT: not p0.b, p0/z, p1.b +; CHECK-NEXT: ret + %res = fcmp uge %a, %b + ret %res +} + +define @fcmp_uge_nxv4bf16( %a, %b) { +; CHECK-LABEL: fcmp_uge_nxv4bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcmgt p1.s, p0/z, z1.s, z0.s +; CHECK-NEXT: not p0.b, p0/z, p1.b +; CHECK-NEXT: ret + %res = fcmp uge %a, %b + ret %res +} + +define @fcmp_uge_nxv8bf16( %a, %b) { +; CHECK-LABEL: fcmp_uge_nxv8bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpkhi z2.s, z0.h +; CHECK-NEXT: uunpkhi z3.s, z1.h +; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: uunpklo z1.s, z1.h +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: lsl z2.s, z2.s, #16 +; CHECK-NEXT: lsl z3.s, z3.s, #16 +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: fcmgt p1.s, p0/z, z3.s, z2.s +; CHECK-NEXT: fcmgt p2.s, p0/z, z1.s, z0.s +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: not p0.b, p0/z, p2.b +; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h +; CHECK-NEXT: ret + %res = fcmp uge %a, %b + ret %res +} + +; +; ULT +; + +define @fcmp_ult_nxv2bf16( %a, %b) { +; CHECK-LABEL: fcmp_ult_nxv2bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, z1.s +; CHECK-NEXT: not p0.b, p0/z, p1.b +; CHECK-NEXT: ret + %res = fcmp ult %a, %b + ret %res +} + +define @fcmp_ult_nxv4bf16( %a, %b) { +; CHECK-LABEL: fcmp_ult_nxv4bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, z1.s +; CHECK-NEXT: not p0.b, p0/z, p1.b +; CHECK-NEXT: ret + %res = fcmp ult %a, %b + ret %res +} + +define @fcmp_ult_nxv8bf16( %a, %b) { +; CHECK-LABEL: fcmp_ult_nxv8bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpkhi z2.s, z1.h +; CHECK-NEXT: uunpkhi z3.s, z0.h +; CHECK-NEXT: uunpklo z1.s, z1.h +; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: lsl z2.s, z2.s, #16 +; CHECK-NEXT: lsl z3.s, z3.s, #16 +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: fcmge p1.s, p0/z, z3.s, z2.s +; CHECK-NEXT: fcmge p2.s, p0/z, z0.s, z1.s +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: not p0.b, p0/z, p2.b +; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h +; CHECK-NEXT: ret + %res = fcmp ult %a, %b + ret %res +} + +; +; ULE +; + +define @fcmp_ule_nxv2bf16( %a, %b) { +; CHECK-LABEL: fcmp_ule_nxv2bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcmgt p1.s, p0/z, z0.s, z1.s +; CHECK-NEXT: not p0.b, p0/z, p1.b +; CHECK-NEXT: ret + %res = fcmp ule %a, %b + ret %res +} + +define @fcmp_ule_nxv4bf16( %a, %b) { +; CHECK-LABEL: fcmp_ule_nxv4bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcmgt p1.s, p0/z, z0.s, z1.s +; CHECK-NEXT: not p0.b, p0/z, p1.b +; CHECK-NEXT: ret + %res = fcmp ule %a, %b + ret %res +} + +define @fcmp_ule_nxv8bf16( %a, %b) { +; CHECK-LABEL: fcmp_ule_nxv8bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpkhi z2.s, z1.h +; CHECK-NEXT: uunpkhi z3.s, z0.h +; CHECK-NEXT: uunpklo z1.s, z1.h +; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: lsl z2.s, z2.s, #16 +; CHECK-NEXT: lsl z3.s, z3.s, #16 +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: fcmgt p1.s, p0/z, z3.s, z2.s +; CHECK-NEXT: fcmgt p2.s, p0/z, z0.s, z1.s +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: not p0.b, p0/z, p2.b +; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h +; CHECK-NEXT: ret + %res = fcmp ule %a, %b + ret %res +} + +; +; UNE +; + +define @fcmp_une_nxv2bf16( %a, %b) { +; CHECK-LABEL: fcmp_une_nxv2bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: ret + %res = fcmp une %a, %b + ret %res +} + +define @fcmp_une_nxv4bf16( %a, %b) { +; CHECK-LABEL: fcmp_une_nxv4bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: ret + %res = fcmp une %a, %b + ret %res +} + +define @fcmp_une_nxv8bf16( %a, %b) { +; CHECK-LABEL: fcmp_une_nxv8bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpkhi z2.s, z1.h +; CHECK-NEXT: uunpkhi z3.s, z0.h +; CHECK-NEXT: uunpklo z1.s, z1.h +; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: lsl z2.s, z2.s, #16 +; CHECK-NEXT: lsl z3.s, z3.s, #16 +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: fcmne p1.s, p0/z, z3.s, z2.s +; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h +; CHECK-NEXT: ret + %res = fcmp une %a, %b + ret %res +} + +; +; UNO +; + +define @fcmp_uno_nxv2bf16( %a, %b) { +; CHECK-LABEL: fcmp_uno_nxv2bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcmuo p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: ret + %res = fcmp uno %a, %b + ret %res +} + +define @fcmp_uno_nxv4bf16( %a, %b) { +; CHECK-LABEL: fcmp_uno_nxv4bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcmuo p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: ret + %res = fcmp uno %a, %b + ret %res +} + +define @fcmp_uno_nxv8bf16( %a, %b) { +; CHECK-LABEL: fcmp_uno_nxv8bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpkhi z2.s, z1.h +; CHECK-NEXT: uunpkhi z3.s, z0.h +; CHECK-NEXT: uunpklo z1.s, z1.h +; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: lsl z2.s, z2.s, #16 +; CHECK-NEXT: lsl z3.s, z3.s, #16 +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: fcmuo p1.s, p0/z, z3.s, z2.s +; CHECK-NEXT: fcmuo p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h +; CHECK-NEXT: ret + %res = fcmp uno %a, %b + ret %res +} + +; +; EQ +; + +define @fcmp_eq_nxv2bf16( %a, %b) { +; CHECK-LABEL: fcmp_eq_nxv2bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: ret + %res = fcmp fast oeq %a, %b + ret %res +} + +define @fcmp_eq_nxv4bf16( %a, %b) { +; CHECK-LABEL: fcmp_eq_nxv4bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: ret + %res = fcmp fast oeq %a, %b + ret %res +} + +define @fcmp_eq_nxv8bf16( %a, %b) { +; CHECK-LABEL: fcmp_eq_nxv8bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpkhi z2.s, z1.h +; CHECK-NEXT: uunpkhi z3.s, z0.h +; CHECK-NEXT: uunpklo z1.s, z1.h +; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: lsl z2.s, z2.s, #16 +; CHECK-NEXT: lsl z3.s, z3.s, #16 +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: fcmeq p1.s, p0/z, z3.s, z2.s +; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h +; CHECK-NEXT: ret + %res = fcmp fast oeq %a, %b + ret %res +} + +; +; GT +; + +define @fcmp_gt_nxv2bf16( %a, %b) { +; CHECK-LABEL: fcmp_gt_nxv2bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: ret + %res = fcmp fast ogt %a, %b + ret %res +} + +define @fcmp_gt_nxv4bf16( %a, %b) { +; CHECK-LABEL: fcmp_gt_nxv4bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: ret + %res = fcmp fast ogt %a, %b + ret %res +} + +define @fcmp_gt_nxv8bf16( %a, %b) { +; CHECK-LABEL: fcmp_gt_nxv8bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpkhi z2.s, z1.h +; CHECK-NEXT: uunpkhi z3.s, z0.h +; CHECK-NEXT: uunpklo z1.s, z1.h +; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: lsl z2.s, z2.s, #16 +; CHECK-NEXT: lsl z3.s, z3.s, #16 +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: fcmgt p1.s, p0/z, z3.s, z2.s +; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h +; CHECK-NEXT: ret + %res = fcmp fast ogt %a, %b + ret %res +} + +; +; GE +; + +define @fcmp_ge_nxv2bf16( %a, %b) { +; CHECK-LABEL: fcmp_ge_nxv2bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcmge p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: ret + %res = fcmp fast oge %a, %b + ret %res +} + +define @fcmp_ge_nxv4bf16( %a, %b) { +; CHECK-LABEL: fcmp_ge_nxv4bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcmge p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: ret + %res = fcmp fast oge %a, %b + ret %res +} + +define @fcmp_ge_nxv8bf16( %a, %b) { +; CHECK-LABEL: fcmp_ge_nxv8bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpkhi z2.s, z1.h +; CHECK-NEXT: uunpkhi z3.s, z0.h +; CHECK-NEXT: uunpklo z1.s, z1.h +; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: lsl z2.s, z2.s, #16 +; CHECK-NEXT: lsl z3.s, z3.s, #16 +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: fcmge p1.s, p0/z, z3.s, z2.s +; CHECK-NEXT: fcmge p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h +; CHECK-NEXT: ret + %res = fcmp fast oge %a, %b + ret %res +} + +; +; LT +; + +define @fcmp_lt_nxv2bf16( %a, %b) { +; CHECK-LABEL: fcmp_lt_nxv2bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcmgt p0.s, p0/z, z1.s, z0.s +; CHECK-NEXT: ret + %res = fcmp fast olt %a, %b + ret %res +} + +define @fcmp_lt_nxv4bf16( %a, %b) { +; CHECK-LABEL: fcmp_lt_nxv4bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcmgt p0.s, p0/z, z1.s, z0.s +; CHECK-NEXT: ret + %res = fcmp fast olt %a, %b + ret %res +} + +define @fcmp_lt_nxv8bf16( %a, %b) { +; CHECK-LABEL: fcmp_lt_nxv8bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpkhi z2.s, z0.h +; CHECK-NEXT: uunpkhi z3.s, z1.h +; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: uunpklo z1.s, z1.h +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: lsl z2.s, z2.s, #16 +; CHECK-NEXT: lsl z3.s, z3.s, #16 +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: fcmgt p1.s, p0/z, z3.s, z2.s +; CHECK-NEXT: fcmgt p0.s, p0/z, z1.s, z0.s +; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h +; CHECK-NEXT: ret + %res = fcmp fast olt %a, %b + ret %res +} + +; +; LE +; + +define @fcmp_le_nxv2bf16( %a, %b) { +; CHECK-LABEL: fcmp_le_nxv2bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcmge p0.s, p0/z, z1.s, z0.s +; CHECK-NEXT: ret + %res = fcmp fast ole %a, %b + ret %res +} + +define @fcmp_le_nxv4bf16( %a, %b) { +; CHECK-LABEL: fcmp_le_nxv4bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcmge p0.s, p0/z, z1.s, z0.s +; CHECK-NEXT: ret + %res = fcmp fast ole %a, %b + ret %res +} + +define @fcmp_le_nxv8bf16( %a, %b) { +; CHECK-LABEL: fcmp_le_nxv8bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpkhi z2.s, z0.h +; CHECK-NEXT: uunpkhi z3.s, z1.h +; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: uunpklo z1.s, z1.h +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: lsl z2.s, z2.s, #16 +; CHECK-NEXT: lsl z3.s, z3.s, #16 +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: fcmge p1.s, p0/z, z3.s, z2.s +; CHECK-NEXT: fcmge p0.s, p0/z, z1.s, z0.s +; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h +; CHECK-NEXT: ret + %res = fcmp fast ole %a, %b + ret %res +} + +; +; NE +; + +define @fcmp_ne_nxv2bf16( %a, %b) { +; CHECK-LABEL: fcmp_ne_nxv2bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: ret + %res = fcmp fast one %a, %b + ret %res +} + +define @fcmp_ne_nxv4bf16( %a, %b) { +; CHECK-LABEL: fcmp_ne_nxv4bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: ret + %res = fcmp fast one %a, %b + ret %res +} + +define @fcmp_ne_nxv8bf16( %a, %b) { +; CHECK-LABEL: fcmp_ne_nxv8bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpkhi z2.s, z1.h +; CHECK-NEXT: uunpkhi z3.s, z0.h +; CHECK-NEXT: uunpklo z1.s, z1.h +; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: lsl z2.s, z2.s, #16 +; CHECK-NEXT: lsl z3.s, z3.s, #16 +; CHECK-NEXT: lsl z1.s, z1.s, #16 +; CHECK-NEXT: lsl z0.s, z0.s, #16 +; CHECK-NEXT: fcmne p1.s, p0/z, z3.s, z2.s +; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h +; CHECK-NEXT: ret + %res = fcmp fast one %a, %b + ret %res +}