Skip to content

Conversation

@topperc
Copy link
Collaborator

@topperc topperc commented Aug 8, 2025

Similar to the PACKH+PACK pattern for RV32. We can end up with the
shift left by 32 neeed by our PACK pattern hidden behind an OR that
packs 2 half words.

topperc added 2 commits August 8, 2025 08:52
Similar to the PACKH+PACK pattern for RV32. We can end up with the
shift left by 32 neeed by our PACK pattern hidden behind an OR that
packs 2 half words.
@llvmbot
Copy link
Member

llvmbot commented Aug 8, 2025

@llvm/pr-subscribers-backend-risc-v

Author: Craig Topper (topperc)

Changes

Similar to the PACKH+PACK pattern for RV32. We can end up with the
shift left by 32 neeed by our PACK pattern hidden behind an OR that
packs 2 half words.


Full diff: https://github.com/llvm/llvm-project/pull/152760.diff

2 Files Affected:

  • (modified) llvm/lib/Target/RISCV/RISCVInstrInfoZb.td (+10)
  • (modified) llvm/test/CodeGen/RISCV/unaligned-load-store.ll (+65-2)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
index 413ad8b2f9cc9..0b178ae365ac7 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
@@ -692,6 +692,16 @@ def : Pat<(binop_allwusers<or>
                    (shl GPR:$op1rs1, (XLenVT 24))),
                (shl (zexti8 (XLenVT GPR:$op1rs2)), (XLenVT 16))),
           (PACKW GPR:$rs1, (XLenVT (PACKH GPR:$op1rs1, GPR:$op1rs2)))>;
+
+// Match a pattern of 2 halfwords being inserted into bits [63:32], with bits
+// bits [31:0] coming from a zero extended value. We can use pack with packw for
+// bits [63:32]. If bits [63:31] can also be a packw, it can be matched
+// separately.
+def : Pat<(or (or (shl GPR:$op1rs2, (i64 48)),
+                  (shl (zexti16 (i64 GPR:$op1rs1)), (i64 32))),
+              (zexti32 (i64 GPR:$rs1))),
+          (PACK (XLenVT GPR:$rs1),
+                (XLenVT (PACKW GPR:$op1rs1, GPR:$op1rs2)))>;
 } // Predicates = [HasStdExtZbkb, IsRV64]
 
 let Predicates = [HasStdExtZbb, IsRV32] in
diff --git a/llvm/test/CodeGen/RISCV/unaligned-load-store.ll b/llvm/test/CodeGen/RISCV/unaligned-load-store.ll
index cb046cdaae75c..b396d16b0eb26 100644
--- a/llvm/test/CodeGen/RISCV/unaligned-load-store.ll
+++ b/llvm/test/CodeGen/RISCV/unaligned-load-store.ll
@@ -232,6 +232,69 @@ define i64 @load_i64(ptr %p) {
   ret i64 %res
 }
 
+define i64 @load_i64_align2(ptr %p) {
+; RV32I-LABEL: load_i64_align2:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    lhu a1, 2(a0)
+; RV32I-NEXT:    lhu a2, 0(a0)
+; RV32I-NEXT:    lhu a3, 6(a0)
+; RV32I-NEXT:    lhu a4, 4(a0)
+; RV32I-NEXT:    slli a0, a1, 16
+; RV32I-NEXT:    or a0, a0, a2
+; RV32I-NEXT:    slli a1, a3, 16
+; RV32I-NEXT:    or a1, a1, a4
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: load_i64_align2:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lhu a1, 2(a0)
+; RV64I-NEXT:    lhu a2, 0(a0)
+; RV64I-NEXT:    lhu a3, 4(a0)
+; RV64I-NEXT:    lhu a0, 6(a0)
+; RV64I-NEXT:    slli a1, a1, 16
+; RV64I-NEXT:    or a1, a1, a2
+; RV64I-NEXT:    slli a3, a3, 32
+; RV64I-NEXT:    slli a0, a0, 48
+; RV64I-NEXT:    or a0, a0, a3
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV32IZBKB-LABEL: load_i64_align2:
+; RV32IZBKB:       # %bb.0:
+; RV32IZBKB-NEXT:    lhu a1, 0(a0)
+; RV32IZBKB-NEXT:    lhu a2, 2(a0)
+; RV32IZBKB-NEXT:    lhu a3, 4(a0)
+; RV32IZBKB-NEXT:    lhu a4, 6(a0)
+; RV32IZBKB-NEXT:    pack a0, a1, a2
+; RV32IZBKB-NEXT:    pack a1, a3, a4
+; RV32IZBKB-NEXT:    ret
+;
+; RV64IZBKB-LABEL: load_i64_align2:
+; RV64IZBKB:       # %bb.0:
+; RV64IZBKB-NEXT:    lhu a1, 2(a0)
+; RV64IZBKB-NEXT:    lhu a2, 4(a0)
+; RV64IZBKB-NEXT:    lhu a3, 6(a0)
+; RV64IZBKB-NEXT:    lhu a0, 0(a0)
+; RV64IZBKB-NEXT:    packw a2, a2, a3
+; RV64IZBKB-NEXT:    packw a0, a0, a1
+; RV64IZBKB-NEXT:    pack a0, a0, a2
+; RV64IZBKB-NEXT:    ret
+;
+; RV32I-FAST-LABEL: load_i64_align2:
+; RV32I-FAST:       # %bb.0:
+; RV32I-FAST-NEXT:    lw a2, 0(a0)
+; RV32I-FAST-NEXT:    lw a1, 4(a0)
+; RV32I-FAST-NEXT:    mv a0, a2
+; RV32I-FAST-NEXT:    ret
+;
+; RV64I-FAST-LABEL: load_i64_align2:
+; RV64I-FAST:       # %bb.0:
+; RV64I-FAST-NEXT:    ld a0, 0(a0)
+; RV64I-FAST-NEXT:    ret
+  %res = load i64, ptr %p, align 2
+  ret i64 %res
+}
+
 define void @store_i8(ptr %p, i8 %v) {
 ; ALL-LABEL: store_i8:
 ; ALL:       # %bb.0:
@@ -569,8 +632,8 @@ define void @store_large_constant(ptr %x) {
 ;
 ; RV64I-FAST-LABEL: store_large_constant:
 ; RV64I-FAST:       # %bb.0:
-; RV64I-FAST-NEXT:    lui a1, %hi(.LCPI16_0)
-; RV64I-FAST-NEXT:    ld a1, %lo(.LCPI16_0)(a1)
+; RV64I-FAST-NEXT:    lui a1, %hi(.LCPI17_0)
+; RV64I-FAST-NEXT:    ld a1, %lo(.LCPI17_0)(a1)
 ; RV64I-FAST-NEXT:    sd a1, 0(a0)
 ; RV64I-FAST-NEXT:    ret
   store i64 18364758544493064720, ptr %x, align 1

Copy link
Contributor

@wangpc-pp wangpc-pp left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM.

@topperc topperc merged commit f55281a into llvm:main Aug 11, 2025
9 checks passed
@topperc topperc deleted the pr/packw-pack branch August 11, 2025 12:38
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants