-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[RISCV] Add a high half PACKW+PACK pattern for RV64. #152760
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Similar to the PACKH+PACK pattern for RV32. We can end up with the shift left by 32 neeed by our PACK pattern hidden behind an OR that packs 2 half words.
|
@llvm/pr-subscribers-backend-risc-v Author: Craig Topper (topperc) ChangesSimilar to the PACKH+PACK pattern for RV32. We can end up with the Full diff: https://github.com/llvm/llvm-project/pull/152760.diff 2 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
index 413ad8b2f9cc9..0b178ae365ac7 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
@@ -692,6 +692,16 @@ def : Pat<(binop_allwusers<or>
(shl GPR:$op1rs1, (XLenVT 24))),
(shl (zexti8 (XLenVT GPR:$op1rs2)), (XLenVT 16))),
(PACKW GPR:$rs1, (XLenVT (PACKH GPR:$op1rs1, GPR:$op1rs2)))>;
+
+// Match a pattern of 2 halfwords being inserted into bits [63:32], with bits
+// bits [31:0] coming from a zero extended value. We can use pack with packw for
+// bits [63:32]. If bits [63:31] can also be a packw, it can be matched
+// separately.
+def : Pat<(or (or (shl GPR:$op1rs2, (i64 48)),
+ (shl (zexti16 (i64 GPR:$op1rs1)), (i64 32))),
+ (zexti32 (i64 GPR:$rs1))),
+ (PACK (XLenVT GPR:$rs1),
+ (XLenVT (PACKW GPR:$op1rs1, GPR:$op1rs2)))>;
} // Predicates = [HasStdExtZbkb, IsRV64]
let Predicates = [HasStdExtZbb, IsRV32] in
diff --git a/llvm/test/CodeGen/RISCV/unaligned-load-store.ll b/llvm/test/CodeGen/RISCV/unaligned-load-store.ll
index cb046cdaae75c..b396d16b0eb26 100644
--- a/llvm/test/CodeGen/RISCV/unaligned-load-store.ll
+++ b/llvm/test/CodeGen/RISCV/unaligned-load-store.ll
@@ -232,6 +232,69 @@ define i64 @load_i64(ptr %p) {
ret i64 %res
}
+define i64 @load_i64_align2(ptr %p) {
+; RV32I-LABEL: load_i64_align2:
+; RV32I: # %bb.0:
+; RV32I-NEXT: lhu a1, 2(a0)
+; RV32I-NEXT: lhu a2, 0(a0)
+; RV32I-NEXT: lhu a3, 6(a0)
+; RV32I-NEXT: lhu a4, 4(a0)
+; RV32I-NEXT: slli a0, a1, 16
+; RV32I-NEXT: or a0, a0, a2
+; RV32I-NEXT: slli a1, a3, 16
+; RV32I-NEXT: or a1, a1, a4
+; RV32I-NEXT: ret
+;
+; RV64I-LABEL: load_i64_align2:
+; RV64I: # %bb.0:
+; RV64I-NEXT: lhu a1, 2(a0)
+; RV64I-NEXT: lhu a2, 0(a0)
+; RV64I-NEXT: lhu a3, 4(a0)
+; RV64I-NEXT: lhu a0, 6(a0)
+; RV64I-NEXT: slli a1, a1, 16
+; RV64I-NEXT: or a1, a1, a2
+; RV64I-NEXT: slli a3, a3, 32
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: or a0, a0, a3
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: ret
+;
+; RV32IZBKB-LABEL: load_i64_align2:
+; RV32IZBKB: # %bb.0:
+; RV32IZBKB-NEXT: lhu a1, 0(a0)
+; RV32IZBKB-NEXT: lhu a2, 2(a0)
+; RV32IZBKB-NEXT: lhu a3, 4(a0)
+; RV32IZBKB-NEXT: lhu a4, 6(a0)
+; RV32IZBKB-NEXT: pack a0, a1, a2
+; RV32IZBKB-NEXT: pack a1, a3, a4
+; RV32IZBKB-NEXT: ret
+;
+; RV64IZBKB-LABEL: load_i64_align2:
+; RV64IZBKB: # %bb.0:
+; RV64IZBKB-NEXT: lhu a1, 2(a0)
+; RV64IZBKB-NEXT: lhu a2, 4(a0)
+; RV64IZBKB-NEXT: lhu a3, 6(a0)
+; RV64IZBKB-NEXT: lhu a0, 0(a0)
+; RV64IZBKB-NEXT: packw a2, a2, a3
+; RV64IZBKB-NEXT: packw a0, a0, a1
+; RV64IZBKB-NEXT: pack a0, a0, a2
+; RV64IZBKB-NEXT: ret
+;
+; RV32I-FAST-LABEL: load_i64_align2:
+; RV32I-FAST: # %bb.0:
+; RV32I-FAST-NEXT: lw a2, 0(a0)
+; RV32I-FAST-NEXT: lw a1, 4(a0)
+; RV32I-FAST-NEXT: mv a0, a2
+; RV32I-FAST-NEXT: ret
+;
+; RV64I-FAST-LABEL: load_i64_align2:
+; RV64I-FAST: # %bb.0:
+; RV64I-FAST-NEXT: ld a0, 0(a0)
+; RV64I-FAST-NEXT: ret
+ %res = load i64, ptr %p, align 2
+ ret i64 %res
+}
+
define void @store_i8(ptr %p, i8 %v) {
; ALL-LABEL: store_i8:
; ALL: # %bb.0:
@@ -569,8 +632,8 @@ define void @store_large_constant(ptr %x) {
;
; RV64I-FAST-LABEL: store_large_constant:
; RV64I-FAST: # %bb.0:
-; RV64I-FAST-NEXT: lui a1, %hi(.LCPI16_0)
-; RV64I-FAST-NEXT: ld a1, %lo(.LCPI16_0)(a1)
+; RV64I-FAST-NEXT: lui a1, %hi(.LCPI17_0)
+; RV64I-FAST-NEXT: ld a1, %lo(.LCPI17_0)(a1)
; RV64I-FAST-NEXT: sd a1, 0(a0)
; RV64I-FAST-NEXT: ret
store i64 18364758544493064720, ptr %x, align 1
|
wangpc-pp
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
Similar to the PACKH+PACK pattern for RV32. We can end up with the
shift left by 32 neeed by our PACK pattern hidden behind an OR that
packs 2 half words.