From 27e82f1ab13c0a1761ea0c1c55f33c4014def119 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Tue, 30 Jul 2024 15:33:20 +0800 Subject: [PATCH 1/4] Precommit tests --- .../RISCV/rvv/rvv-peephole-vmerge-vops.ll | 36 +++++++++++++++++++ .../CodeGen/RISCV/rvv/vmv.v.v-peephole.ll | 12 +++++++ 2 files changed, 48 insertions(+) diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll index a08bcae074b9b..ddf83d87cea6c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll @@ -1196,3 +1196,39 @@ define @true_mask_vmerge_implicit_passthru( ) ret %b } + +define @unfoldable_mismatched_sew_mask( %passthru, %x, %y, %mask, i64 %avl) { +; CHECK-LABEL: unfoldable_mismatched_sew_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t +; CHECK-NEXT: ret + %a = call @llvm.riscv.vadd.nxv1i64.nxv1i64( poison, %x, %y, i64 %avl) + %a.bitcast = bitcast %a to + %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( + %passthru, + %passthru, + %a.bitcast, + %mask, + i64 %avl + ) + ret %b +} + +define @unfoldable_mismatched_sew_avl( %passthru, %x, %y) { +; CHECK-LABEL: unfoldable_mismatched_sew_avl: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 3, e64, m1, tu, ma +; CHECK-NEXT: vadd.vv v8, v9, v10 +; CHECK-NEXT: ret + %a = call @llvm.riscv.vadd.nxv1i64.nxv1i64( poison, %x, %y, i64 5) + %a.bitcast = bitcast %a to + %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( + %passthru, + %passthru, + %a.bitcast, + splat (i1 true), + i64 3 + ) + ret %b +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll index 8a589a31a1535..65ee91a0e1907 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll @@ -180,3 +180,15 @@ define @unfoldable_vredsum( %passthru, @llvm.riscv.vmv.v.v.nxv2i32( %passthru, %a, iXLen 1) ret %b } + +define @unfoldable_mismatched_sew_diff_vl( %passthru, %x, %y) { +; CHECK-LABEL: unfoldable_mismatched_sew_diff_vl: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 3, e64, m1, tu, ma +; CHECK-NEXT: vadd.vv v8, v9, v10 +; CHECK-NEXT: ret + %a = call @llvm.riscv.vadd.nxv1i64.nxv1i64( poison, %x, %y, iXLen 6) + %a.bitcast = bitcast %a to + %b = call @llvm.riscv.vmv.v.v.nxv2i32( %passthru, %a.bitcast, iXLen 3) + ret %b +} From fcbbfebef6affea547cc4201c9f4722148a03719 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Tue, 30 Jul 2024 16:35:28 +0800 Subject: [PATCH 2/4] [RISCV] Fix vmerge.vvm/vmv.v.v getting folded into ops with mismatching EEW As noted in https://github.com/llvm/llvm-project/pull/100367/files#r1695448771, we currently fold in vmerge.vvms and vmv.v.vs into their ops even if the EEW is different. This is incorrect if we end up changing the mask or AVL of the op. This gets the op's EEW via its simple value type for now since there doesn't seem to be any existing information about the EEW size of instructions. We'll probably need to encode this at some point if we want to be able to access it at the MachineInstr level in #100367 --- llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 10 +++++++++- .../CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll | 12 ++++++++---- llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll | 6 ++++-- 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 4418905ce21ed..fc0238e4892d6 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -3855,11 +3855,19 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) { // If we end up changing the VL or mask of True, then we need to make sure it // doesn't raise any observable fp exceptions, since changing the active // elements will affect how fflags is set. - if (TrueVL != VL || !IsMasked) + if (TrueVL != VL || !IsMasked) { if (mayRaiseFPException(True.getNode()) && !True->getFlags().hasNoFPExcept()) return false; + // If the EEW of True is different from vmerge's SEW, then we cannot change + // the VL or mask. + if (Log2_64(True.getSimpleValueType().getScalarSizeInBits()) != + N->getConstantOperandVal( + RISCVII::getSEWOpNum(TII->get(N->getMachineOpcode())) - 1)) + return false; + } + SDLoc DL(N); // From the preconditions we checked above, we know the mask and thus glue diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll index ddf83d87cea6c..c0fb675cb991f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll @@ -1200,8 +1200,10 @@ define @true_mask_vmerge_implicit_passthru( define @unfoldable_mismatched_sew_mask( %passthru, %x, %y, %mask, i64 %avl) { ; CHECK-LABEL: unfoldable_mismatched_sew_mask: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu -; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vadd.vv v9, v9, v10 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 ; CHECK-NEXT: ret %a = call @llvm.riscv.vadd.nxv1i64.nxv1i64( poison, %x, %y, i64 %avl) %a.bitcast = bitcast %a to @@ -1218,8 +1220,10 @@ define @unfoldable_mismatched_sew_mask( %pa define @unfoldable_mismatched_sew_avl( %passthru, %x, %y) { ; CHECK-LABEL: unfoldable_mismatched_sew_avl: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 3, e64, m1, tu, ma -; CHECK-NEXT: vadd.vv v8, v9, v10 +; CHECK-NEXT: vsetivli zero, 5, e64, m1, ta, ma +; CHECK-NEXT: vadd.vv v9, v9, v10 +; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma +; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret %a = call @llvm.riscv.vadd.nxv1i64.nxv1i64( poison, %x, %y, i64 5) %a.bitcast = bitcast %a to diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll index 65ee91a0e1907..4940e652170ab 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll @@ -184,8 +184,10 @@ define @unfoldable_vredsum( %passthru, @unfoldable_mismatched_sew_diff_vl( %passthru, %x, %y) { ; CHECK-LABEL: unfoldable_mismatched_sew_diff_vl: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 3, e64, m1, tu, ma -; CHECK-NEXT: vadd.vv v8, v9, v10 +; CHECK-NEXT: vsetivli zero, 6, e64, m1, ta, ma +; CHECK-NEXT: vadd.vv v9, v9, v10 +; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma +; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret %a = call @llvm.riscv.vadd.nxv1i64.nxv1i64( poison, %x, %y, iXLen 6) %a.bitcast = bitcast %a to From 019073717c5511af11c002d7c1b24f07cd2f952c Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Tue, 30 Jul 2024 17:48:48 +0800 Subject: [PATCH 3/4] Use getScalarValueSizeInBits --- llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index fc0238e4892d6..d5f645e8d5158 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -3862,7 +3862,7 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) { // If the EEW of True is different from vmerge's SEW, then we cannot change // the VL or mask. - if (Log2_64(True.getSimpleValueType().getScalarSizeInBits()) != + if (Log2_64(True.getScalarValueSizeInBits()) != N->getConstantOperandVal( RISCVII::getSEWOpNum(TII->get(N->getMachineOpcode())) - 1)) return false; From 70395f346e21697a8d3ba3239ac47c43e6013558 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Tue, 30 Jul 2024 23:37:43 +0800 Subject: [PATCH 4/4] Just check MVTs, bail in every case, remove redundant tests --- llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 14 ++++----- .../RISCV/rvv/rvv-peephole-vmerge-vops.ll | 29 ++++--------------- .../CodeGen/RISCV/rvv/vmv.v.v-peephole.ll | 12 ++++---- 3 files changed, 16 insertions(+), 39 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index d5f645e8d5158..4de38db6e1fe9 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -3733,6 +3733,10 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) { assert(!Mask || cast(Mask)->getReg() == RISCV::V0); assert(!Glue || Glue.getValueType() == MVT::Glue); + // If the EEW of True is different from vmerge's SEW, then we can't fold. + if (True.getSimpleValueType() != N->getSimpleValueType(0)) + return false; + // We require that either passthru and false are the same, or that passthru // is undefined. if (Passthru != False && !isImplicitDef(Passthru)) @@ -3855,19 +3859,11 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) { // If we end up changing the VL or mask of True, then we need to make sure it // doesn't raise any observable fp exceptions, since changing the active // elements will affect how fflags is set. - if (TrueVL != VL || !IsMasked) { + if (TrueVL != VL || !IsMasked) if (mayRaiseFPException(True.getNode()) && !True->getFlags().hasNoFPExcept()) return false; - // If the EEW of True is different from vmerge's SEW, then we cannot change - // the VL or mask. - if (Log2_64(True.getScalarValueSizeInBits()) != - N->getConstantOperandVal( - RISCVII::getSEWOpNum(TII->get(N->getMachineOpcode())) - 1)) - return false; - } - SDLoc DL(N); // From the preconditions we checked above, we know the mask and thus glue diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll index c0fb675cb991f..259515f160048 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll @@ -1197,42 +1197,23 @@ define @true_mask_vmerge_implicit_passthru( ret %b } -define @unfoldable_mismatched_sew_mask( %passthru, %x, %y, %mask, i64 %avl) { -; CHECK-LABEL: unfoldable_mismatched_sew_mask: + +define @unfoldable_mismatched_sew( %passthru, %x, %y, %mask, i64 %avl) { +; CHECK-LABEL: unfoldable_mismatched_sew: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vadd.vv v9, v9, v10 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 -; CHECK-NEXT: ret - %a = call @llvm.riscv.vadd.nxv1i64.nxv1i64( poison, %x, %y, i64 %avl) - %a.bitcast = bitcast %a to - %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( - %passthru, - %passthru, - %a.bitcast, - %mask, - i64 %avl - ) - ret %b -} - -define @unfoldable_mismatched_sew_avl( %passthru, %x, %y) { -; CHECK-LABEL: unfoldable_mismatched_sew_avl: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 5, e64, m1, ta, ma -; CHECK-NEXT: vadd.vv v9, v9, v10 -; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret - %a = call @llvm.riscv.vadd.nxv1i64.nxv1i64( poison, %x, %y, i64 5) + %a = call @llvm.riscv.vadd.nxv1i64.nxv1i64( poison, %x, %y, i64 %avl) %a.bitcast = bitcast %a to %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %passthru, %passthru, %a.bitcast, splat (i1 true), - i64 3 + i64 %avl ) ret %b } diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll index 4940e652170ab..3952e48c5c28f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll @@ -181,16 +181,16 @@ define @unfoldable_vredsum( %passthru, %b } -define @unfoldable_mismatched_sew_diff_vl( %passthru, %x, %y) { -; CHECK-LABEL: unfoldable_mismatched_sew_diff_vl: +define @unfoldable_mismatched_sew( %passthru, %x, %y, iXLen %avl) { +; CHECK-LABEL: unfoldable_mismatched_sew: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 6, e64, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vadd.vv v9, v9, v10 -; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret - %a = call @llvm.riscv.vadd.nxv1i64.nxv1i64( poison, %x, %y, iXLen 6) + %a = call @llvm.riscv.vadd.nxv1i64.nxv1i64( poison, %x, %y, iXLen %avl) %a.bitcast = bitcast %a to - %b = call @llvm.riscv.vmv.v.v.nxv2i32( %passthru, %a.bitcast, iXLen 3) + %b = call @llvm.riscv.vmv.v.v.nxv2i32( %passthru, %a.bitcast, iXLen %avl) ret %b }