diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 01fa418e4dbdf..bfabf87811c8f 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -13572,8 +13572,10 @@ static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG) { return DAG.getNode(ISD::ADD, DL, VT, NewLHS, NewRHS); } -// Looks for (sub (shl X, 8), X) where only bits 8, 16, 24, 32, etc. of X are -// non-zero. Replace with orc.b. +// Looks for (sub (shl X, 8-Y), (shr X, Y)) where the Y-th bit in each byte is +// potentially set. It is fine for Y to be 0, meaning that (sub (shl X, 8), X) +// is also valid. Replace with (orc.b X). For example, 0b0000_1000_0000_1000 is +// valid with Y=3, while 0b0000_1000_0000_0100 is not. static SDValue combineSubShiftToOrcB(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { if (!Subtarget.hasStdExtZbb()) @@ -13587,18 +13589,44 @@ static SDValue combineSubShiftToOrcB(SDNode *N, SelectionDAG &DAG, SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - if (N0.getOpcode() != ISD::SHL || N0.getOperand(0) != N1 || !N0.hasOneUse()) + if (N0->getOpcode() != ISD::SHL) return SDValue(); - auto *ShAmtC = dyn_cast(N0.getOperand(1)); - if (!ShAmtC || ShAmtC->getZExtValue() != 8) + auto *ShAmtCLeft = dyn_cast(N0.getOperand(1)); + if (!ShAmtCLeft) return SDValue(); + unsigned ShiftedAmount = 8 - ShAmtCLeft->getZExtValue(); - APInt Mask = APInt::getSplat(VT.getSizeInBits(), APInt(8, 0xfe)); - if (!DAG.MaskedValueIsZero(N1, Mask)) + if (ShiftedAmount >= 8) return SDValue(); - return DAG.getNode(RISCVISD::ORC_B, SDLoc(N), VT, N1); + SDValue LeftShiftOperand = N0->getOperand(0); + SDValue RightShiftOperand = N1; + + if (ShiftedAmount != 0) { // Right operand must be a right shift. + if (N1->getOpcode() != ISD::SRL) + return SDValue(); + auto *ShAmtCRight = dyn_cast(N1.getOperand(1)); + if (!ShAmtCRight || ShAmtCRight->getZExtValue() != ShiftedAmount) + return SDValue(); + RightShiftOperand = N1.getOperand(0); + } + + // At least one shift should have a single use. + if (!N0.hasOneUse() && (ShiftedAmount == 0 || !N1.hasOneUse())) + return SDValue(); + + if (LeftShiftOperand != RightShiftOperand) + return SDValue(); + + APInt Mask = APInt::getSplat(VT.getSizeInBits(), APInt(8, 0x1)); + Mask <<= ShiftedAmount; + // Check that X has indeed the right shape (only the Y-th bit can be set in + // every byte). + if (!DAG.MaskedValueIsZero(LeftShiftOperand, ~Mask)) + return SDValue(); + + return DAG.getNode(RISCVISD::ORC_B, SDLoc(N), VT, LeftShiftOperand); } static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG, diff --git a/llvm/test/CodeGen/RISCV/orc-b-patterns.ll b/llvm/test/CodeGen/RISCV/orc-b-patterns.ll new file mode 100644 index 0000000000000..184e66c14b33f --- /dev/null +++ b/llvm/test/CodeGen/RISCV/orc-b-patterns.ll @@ -0,0 +1,372 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=CHECK,RV32I +; RUN: llc -mtriple=riscv32 -mattr=+zbb -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=CHECK,RV32ZBB + +define i32 @orc_b_i32_mul255(i32 %x) nounwind { +; RV32I-LABEL: orc_b_i32_mul255: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a1, 4112 +; RV32I-NEXT: addi a1, a1, 257 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: slli a1, a0, 8 +; RV32I-NEXT: sub a0, a1, a0 +; RV32I-NEXT: ret +; +; RV32ZBB-LABEL: orc_b_i32_mul255: +; RV32ZBB: # %bb.0: # %entry +; RV32ZBB-NEXT: lui a1, 4112 +; RV32ZBB-NEXT: addi a1, a1, 257 +; RV32ZBB-NEXT: and a0, a0, a1 +; RV32ZBB-NEXT: orc.b a0, a0 +; RV32ZBB-NEXT: ret +entry: + %and = and i32 %x, 16843009 + %mul = mul nuw nsw i32 %and, 255 + ret i32 %mul +} + + +define i32 @orc_b_i32_sub_shl8x_x_lsb(i32 %x) { +; RV32I-LABEL: orc_b_i32_sub_shl8x_x_lsb: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a1, 4112 +; RV32I-NEXT: addi a1, a1, 257 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: slli a1, a0, 8 +; RV32I-NEXT: sub a0, a1, a0 +; RV32I-NEXT: ret +; +; RV32ZBB-LABEL: orc_b_i32_sub_shl8x_x_lsb: +; RV32ZBB: # %bb.0: # %entry +; RV32ZBB-NEXT: lui a1, 4112 +; RV32ZBB-NEXT: addi a1, a1, 257 +; RV32ZBB-NEXT: and a0, a0, a1 +; RV32ZBB-NEXT: orc.b a0, a0 +; RV32ZBB-NEXT: ret +entry: + %and = and i32 %x, 16843009 + %sub = mul nuw i32 %and, 255 + ret i32 %sub +} + +define i32 @orc_b_i32_sub_shl8x_x_lsb_preshifted(i32 %x){ +; RV32I-LABEL: orc_b_i32_sub_shl8x_x_lsb_preshifted: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: srli a0, a0, 11 +; RV32I-NEXT: lui a1, 16 +; RV32I-NEXT: addi a1, a1, 257 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: slli a1, a0, 8 +; RV32I-NEXT: sub a0, a1, a0 +; RV32I-NEXT: ret +; +; RV32ZBB-LABEL: orc_b_i32_sub_shl8x_x_lsb_preshifted: +; RV32ZBB: # %bb.0: # %entry +; RV32ZBB-NEXT: srli a0, a0, 11 +; RV32ZBB-NEXT: lui a1, 16 +; RV32ZBB-NEXT: addi a1, a1, 257 +; RV32ZBB-NEXT: and a0, a0, a1 +; RV32ZBB-NEXT: orc.b a0, a0 +; RV32ZBB-NEXT: ret +entry: + %shr = lshr i32 %x, 11 + %and = and i32 %shr, 16843009 + %sub = mul nuw i32 %and, 255 + ret i32 %sub +} + + +define i32 @orc_b_i32_sub_shl8x_x_b1(i32 %x) { +; RV32I-LABEL: orc_b_i32_sub_shl8x_x_b1: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a1, 8224 +; RV32I-NEXT: addi a1, a1, 514 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: slli a1, a0, 7 +; RV32I-NEXT: srli a0, a0, 1 +; RV32I-NEXT: sub a0, a1, a0 +; RV32I-NEXT: ret +; +; RV32ZBB-LABEL: orc_b_i32_sub_shl8x_x_b1: +; RV32ZBB: # %bb.0: # %entry +; RV32ZBB-NEXT: lui a1, 8224 +; RV32ZBB-NEXT: addi a1, a1, 514 +; RV32ZBB-NEXT: and a0, a0, a1 +; RV32ZBB-NEXT: orc.b a0, a0 +; RV32ZBB-NEXT: ret +entry: + %and = and i32 %x, 33686018 + %shl = shl i32 %and, 7 + %shr = lshr exact i32 %and, 1 + %sub = sub nsw i32 %shl, %shr + ret i32 %sub +} + + +define i32 @orc_b_i32_sub_shl8x_x_b2(i32 %x) { +; RV32I-LABEL: orc_b_i32_sub_shl8x_x_b2: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a1, 16448 +; RV32I-NEXT: addi a1, a1, 1028 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: slli a1, a0, 6 +; RV32I-NEXT: srli a0, a0, 2 +; RV32I-NEXT: sub a0, a1, a0 +; RV32I-NEXT: ret +; +; RV32ZBB-LABEL: orc_b_i32_sub_shl8x_x_b2: +; RV32ZBB: # %bb.0: # %entry +; RV32ZBB-NEXT: lui a1, 16448 +; RV32ZBB-NEXT: addi a1, a1, 1028 +; RV32ZBB-NEXT: and a0, a0, a1 +; RV32ZBB-NEXT: orc.b a0, a0 +; RV32ZBB-NEXT: ret +entry: + %and = and i32 %x, 67372036 + %shl = shl i32 %and, 6 + %shr = lshr exact i32 %and, 2 + %sub = sub nsw i32 %shl, %shr + ret i32 %sub +} + + +define i32 @orc_b_i32_sub_shl8x_x_b3(i32 %x) { +; CHECK-LABEL: orc_b_i32_sub_shl8x_x_b3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lui a1, 24672 +; CHECK-NEXT: addi a1, a1, 1542 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: slli a1, a0, 5 +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: ret +entry: + %and = and i32 %x, 101058054 + %shl = shl nuw i32 %and, 5 + %shr = lshr i32 %and, 3 + %sub = sub nsw i32 %shl, %shr + ret i32 %sub +} + + +define i32 @orc_b_i32_sub_shl8x_x_b4(i32 %x) { +; CHECK-LABEL: orc_b_i32_sub_shl8x_x_b4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lui a1, 32897 +; CHECK-NEXT: addi a1, a1, -2040 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: srli a0, a0, 4 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: ret +entry: + %and = and i32 %x, 134744072 + %shl = shl nuw i32 %and, 4 + %shr = lshr i32 %and, 4 + %sub = sub nsw i32 %shl, %shr + ret i32 %sub +} + + +define i32 @orc_b_i32_sub_shl8x_x_b5(i32 %x) { +; CHECK-LABEL: orc_b_i32_sub_shl8x_x_b5: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lui a1, 65793 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: srli a0, a0, 5 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: ret +entry: + %and = and i32 %x, 269488144 + %shl = shl nuw i32 %and, 3 + %shr = lshr i32 %and, 5 + %sub = sub nsw i32 %shl, %shr + ret i32 %sub +} + + +define i32 @orc_b_i32_sub_shl8x_x_b6(i32 %x) { +; CHECK-LABEL: orc_b_i32_sub_shl8x_x_b6: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lui a1, 131586 +; CHECK-NEXT: addi a1, a1, 32 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: srli a0, a0, 6 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: ret +entry: + %and = and i32 %x, 538976288 + %shl = shl nuw i32 %and, 2 + %shr = lshr i32 %and, 6 + %sub = sub nsw i32 %shl, %shr + ret i32 %sub +} + + +define i32 @orc_b_i32_sub_shl8x_x_b7(i32 %x) { +; CHECK-LABEL: orc_b_i32_sub_shl8x_x_b7: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lui a1, 263172 +; CHECK-NEXT: addi a1, a1, 64 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: srli a0, a0, 7 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: ret +entry: + %and = and i32 %x, 1077952576 + %shl = shl nuw i32 %and, 1 + %shr = lshr i32 %and, 7 + %sub = sub nsw i32 %shl, %shr + ret i32 %sub +} + +define i32 @orc_b_i32_sub_shl8x_x_b1_shl_used(i32 %x, ptr %arr) { +; RV32I-LABEL: orc_b_i32_sub_shl8x_x_b1_shl_used: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a2, 8224 +; RV32I-NEXT: addi a2, a2, 514 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: slli a2, a0, 7 +; RV32I-NEXT: srli a3, a0, 1 +; RV32I-NEXT: sub a0, a2, a3 +; RV32I-NEXT: sw a3, 0(a1) +; RV32I-NEXT: ret +; +; RV32ZBB-LABEL: orc_b_i32_sub_shl8x_x_b1_shl_used: +; RV32ZBB: # %bb.0: # %entry +; RV32ZBB-NEXT: lui a2, 8224 +; RV32ZBB-NEXT: addi a2, a2, 514 +; RV32ZBB-NEXT: and a0, a0, a2 +; RV32ZBB-NEXT: srli a2, a0, 1 +; RV32ZBB-NEXT: orc.b a0, a0 +; RV32ZBB-NEXT: sw a2, 0(a1) +; RV32ZBB-NEXT: ret +entry: + %and = and i32 %x, 33686018 + %shl = shl i32 %and, 7 + %shr = lshr exact i32 %and, 1 + store i32 %shr, ptr %arr, align 4 + %sub = sub nsw i32 %shl, %shr + ret i32 %sub +} + +define i32 @orc_b_i32_sub_shl8x_x_b1_srl_used(i32 %x, ptr %arr) { +; RV32I-LABEL: orc_b_i32_sub_shl8x_x_b1_srl_used: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a2, 8224 +; RV32I-NEXT: addi a2, a2, 514 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: slli a2, a0, 7 +; RV32I-NEXT: srli a0, a0, 1 +; RV32I-NEXT: sub a0, a2, a0 +; RV32I-NEXT: sw a2, 0(a1) +; RV32I-NEXT: ret +; +; RV32ZBB-LABEL: orc_b_i32_sub_shl8x_x_b1_srl_used: +; RV32ZBB: # %bb.0: # %entry +; RV32ZBB-NEXT: lui a2, 8224 +; RV32ZBB-NEXT: addi a2, a2, 514 +; RV32ZBB-NEXT: and a0, a0, a2 +; RV32ZBB-NEXT: slli a2, a0, 7 +; RV32ZBB-NEXT: orc.b a0, a0 +; RV32ZBB-NEXT: sw a2, 0(a1) +; RV32ZBB-NEXT: ret +entry: + %and = and i32 %x, 33686018 + %shl = shl i32 %and, 7 + %shr = lshr exact i32 %and, 1 + store i32 %shl, ptr %arr, align 4 + %sub = sub nsw i32 %shl, %shr + ret i32 %sub +} + + +define i32 @orc_b_i32_sub_shl8x_x_b1_not_used(i32 %x, ptr %arr) { +; RV32I-LABEL: orc_b_i32_sub_shl8x_x_b1_not_used: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a1, 8224 +; RV32I-NEXT: addi a1, a1, 514 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: slli a1, a0, 7 +; RV32I-NEXT: srli a0, a0, 1 +; RV32I-NEXT: sub a0, a1, a0 +; RV32I-NEXT: ret +; +; RV32ZBB-LABEL: orc_b_i32_sub_shl8x_x_b1_not_used: +; RV32ZBB: # %bb.0: # %entry +; RV32ZBB-NEXT: lui a1, 8224 +; RV32ZBB-NEXT: addi a1, a1, 514 +; RV32ZBB-NEXT: and a0, a0, a1 +; RV32ZBB-NEXT: orc.b a0, a0 +; RV32ZBB-NEXT: ret +entry: + %and = and i32 %x, 33686018 + %shl = shl i32 %and, 7 + %shr = lshr exact i32 %and, 1 + %sub = sub nsw i32 %shl, %shr + ret i32 %sub +} + +define i32 @orc_b_i32_sub_shl8x_x_shl_used(i32 %x, ptr %arr){ +; CHECK-LABEL: orc_b_i32_sub_shl8x_x_shl_used: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lui a2, 4112 +; CHECK-NEXT: addi a2, a2, 257 +; CHECK-NEXT: and a0, a0, a2 +; CHECK-NEXT: slli a2, a0, 8 +; CHECK-NEXT: sub a0, a2, a0 +; CHECK-NEXT: sw a2, 0(a1) +; CHECK-NEXT: ret +entry: + %and = and i32 %x, 16843009 + %shl = shl i32 %and, 8 + store i32 %shl, ptr %arr, align 4 + %sub = mul nuw i32 %and, 255 + ret i32 %sub +} + +define i32 @orc_b_i32_sub_shl8x_x_b1_both_used(i32 %x, ptr %arr) { +; CHECK-LABEL: orc_b_i32_sub_shl8x_x_b1_both_used: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lui a2, 8224 +; CHECK-NEXT: addi a2, a2, 514 +; CHECK-NEXT: and a0, a0, a2 +; CHECK-NEXT: slli a2, a0, 7 +; CHECK-NEXT: srli a3, a0, 1 +; CHECK-NEXT: sw a2, 0(a1) +; CHECK-NEXT: sub a0, a2, a3 +; CHECK-NEXT: sw a3, 4(a1) +; CHECK-NEXT: ret +entry: + %and = and i32 %x, 33686018 + %shl = shl i32 %and, 7 + %shr = lshr exact i32 %and, 1 + store i32 %shl, ptr %arr, align 4 + %arrayidx1 = getelementptr inbounds i8, ptr %arr, i32 4 + store i32 %shr, ptr %arrayidx1, align 4 + %sub = sub nsw i32 %shl, %shr + ret i32 %sub +} + + +define i32 @orc_b_i32_sub_x_shr8x(i32 %x) { +; CHECK-LABEL: orc_b_i32_sub_x_shr8x: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lui a1, 4112 +; CHECK-NEXT: addi a1, a1, 257 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: srli a1, a0, 8 +; CHECK-NEXT: sub a0, a0, a1 +; CHECK-NEXT: ret +entry: + %and = and i32 %x, 16843009 + %shr = lshr i32 %and, 8 + %sub = sub nsw i32 %and, %shr + ret i32 %sub +}