-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[LLVM][CodeGen][SME] Only emit strided loads in streaming mode. #150445
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[LLVM][CodeGen][SME] Only emit strided loads in streaming mode. #150445
Conversation
|
@llvm/pr-subscribers-backend-aarch64 Author: Paul Walker (paulwalker-arm) ChangesThe selection code for aarch64_sve_ld[nt]1_pn_x{2,4} intrinsics gates the use of strided load instructions behind the SME2 target feature. However, the instructions are only available in streaming mode. Full diff: https://github.com/llvm/llvm-project/pull/150445.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index eca7ca566cfc2..ad42f4b56caf2 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -5296,7 +5296,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
}
case Intrinsic::aarch64_sve_ld1_pn_x2: {
if (VT == MVT::nxv16i8) {
- if (Subtarget->hasSME2())
+ if (Subtarget->hasSME2() && Subtarget->isStreaming())
SelectContiguousMultiVectorLoad(
Node, 2, 0, AArch64::LD1B_2Z_IMM_PSEUDO, AArch64::LD1B_2Z_PSEUDO);
else if (Subtarget->hasSVE2p1())
@@ -5307,7 +5307,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
return;
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
VT == MVT::nxv8bf16) {
- if (Subtarget->hasSME2())
+ if (Subtarget->hasSME2() && Subtarget->isStreaming())
SelectContiguousMultiVectorLoad(
Node, 2, 1, AArch64::LD1H_2Z_IMM_PSEUDO, AArch64::LD1H_2Z_PSEUDO);
else if (Subtarget->hasSVE2p1())
@@ -5317,7 +5317,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
break;
return;
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
- if (Subtarget->hasSME2())
+ if (Subtarget->hasSME2() && Subtarget->isStreaming())
SelectContiguousMultiVectorLoad(
Node, 2, 2, AArch64::LD1W_2Z_IMM_PSEUDO, AArch64::LD1W_2Z_PSEUDO);
else if (Subtarget->hasSVE2p1())
@@ -5327,7 +5327,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
break;
return;
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
- if (Subtarget->hasSME2())
+ if (Subtarget->hasSME2() && Subtarget->isStreaming())
SelectContiguousMultiVectorLoad(
Node, 2, 3, AArch64::LD1D_2Z_IMM_PSEUDO, AArch64::LD1D_2Z_PSEUDO);
else if (Subtarget->hasSVE2p1())
@@ -5341,7 +5341,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
}
case Intrinsic::aarch64_sve_ld1_pn_x4: {
if (VT == MVT::nxv16i8) {
- if (Subtarget->hasSME2())
+ if (Subtarget->hasSME2() && Subtarget->isStreaming())
SelectContiguousMultiVectorLoad(
Node, 4, 0, AArch64::LD1B_4Z_IMM_PSEUDO, AArch64::LD1B_4Z_PSEUDO);
else if (Subtarget->hasSVE2p1())
@@ -5352,7 +5352,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
return;
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
VT == MVT::nxv8bf16) {
- if (Subtarget->hasSME2())
+ if (Subtarget->hasSME2() && Subtarget->isStreaming())
SelectContiguousMultiVectorLoad(
Node, 4, 1, AArch64::LD1H_4Z_IMM_PSEUDO, AArch64::LD1H_4Z_PSEUDO);
else if (Subtarget->hasSVE2p1())
@@ -5362,7 +5362,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
break;
return;
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
- if (Subtarget->hasSME2())
+ if (Subtarget->hasSME2() && Subtarget->isStreaming())
SelectContiguousMultiVectorLoad(
Node, 4, 2, AArch64::LD1W_4Z_IMM_PSEUDO, AArch64::LD1W_4Z_PSEUDO);
else if (Subtarget->hasSVE2p1())
@@ -5372,7 +5372,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
break;
return;
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
- if (Subtarget->hasSME2())
+ if (Subtarget->hasSME2() && Subtarget->isStreaming())
SelectContiguousMultiVectorLoad(
Node, 4, 3, AArch64::LD1D_4Z_IMM_PSEUDO, AArch64::LD1D_4Z_PSEUDO);
else if (Subtarget->hasSVE2p1())
@@ -5386,7 +5386,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
}
case Intrinsic::aarch64_sve_ldnt1_pn_x2: {
if (VT == MVT::nxv16i8) {
- if (Subtarget->hasSME2())
+ if (Subtarget->hasSME2() && Subtarget->isStreaming())
SelectContiguousMultiVectorLoad(Node, 2, 0,
AArch64::LDNT1B_2Z_IMM_PSEUDO,
AArch64::LDNT1B_2Z_PSEUDO);
@@ -5398,7 +5398,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
return;
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
VT == MVT::nxv8bf16) {
- if (Subtarget->hasSME2())
+ if (Subtarget->hasSME2() && Subtarget->isStreaming())
SelectContiguousMultiVectorLoad(Node, 2, 1,
AArch64::LDNT1H_2Z_IMM_PSEUDO,
AArch64::LDNT1H_2Z_PSEUDO);
@@ -5409,7 +5409,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
break;
return;
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
- if (Subtarget->hasSME2())
+ if (Subtarget->hasSME2() && Subtarget->isStreaming())
SelectContiguousMultiVectorLoad(Node, 2, 2,
AArch64::LDNT1W_2Z_IMM_PSEUDO,
AArch64::LDNT1W_2Z_PSEUDO);
@@ -5420,7 +5420,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
break;
return;
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
- if (Subtarget->hasSME2())
+ if (Subtarget->hasSME2() && Subtarget->isStreaming())
SelectContiguousMultiVectorLoad(Node, 2, 3,
AArch64::LDNT1D_2Z_IMM_PSEUDO,
AArch64::LDNT1D_2Z_PSEUDO);
@@ -5435,7 +5435,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
}
case Intrinsic::aarch64_sve_ldnt1_pn_x4: {
if (VT == MVT::nxv16i8) {
- if (Subtarget->hasSME2())
+ if (Subtarget->hasSME2() && Subtarget->isStreaming())
SelectContiguousMultiVectorLoad(Node, 4, 0,
AArch64::LDNT1B_4Z_IMM_PSEUDO,
AArch64::LDNT1B_4Z_PSEUDO);
@@ -5447,7 +5447,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
return;
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
VT == MVT::nxv8bf16) {
- if (Subtarget->hasSME2())
+ if (Subtarget->hasSME2() && Subtarget->isStreaming())
SelectContiguousMultiVectorLoad(Node, 4, 1,
AArch64::LDNT1H_4Z_IMM_PSEUDO,
AArch64::LDNT1H_4Z_PSEUDO);
@@ -5458,7 +5458,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
break;
return;
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
- if (Subtarget->hasSME2())
+ if (Subtarget->hasSME2() && Subtarget->isStreaming())
SelectContiguousMultiVectorLoad(Node, 4, 2,
AArch64::LDNT1W_4Z_IMM_PSEUDO,
AArch64::LDNT1W_4Z_PSEUDO);
@@ -5469,7 +5469,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
break;
return;
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
- if (Subtarget->hasSME2())
+ if (Subtarget->hasSME2() && Subtarget->isStreaming())
SelectContiguousMultiVectorLoad(Node, 4, 3,
AArch64::LDNT1D_4Z_IMM_PSEUDO,
AArch64::LDNT1D_4Z_PSEUDO);
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-ld1.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-ld1.ll
index c63899cf7d257..19ac03d1200b7 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-ld1.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-ld1.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -force-streaming -verify-machineinstrs < %s | FileCheck %s --check-prefixes=STRIDED
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CONTIGUOUS
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1,+sme2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CONTIGUOUS
define <vscale x 32 x i8> @ld1_x2_i8_z0_z8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %z1, target("aarch64.svcount") %pn, ptr %ptr) nounwind {
; CHECK-LABEL: ld1_x2_i8_z0_z8:
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should this line be added to sme2-intrinsics-ldnt1.ll as well?
The selection code for aarch64_sve_ld[nt]1_pn_x{2,4} intrinsics gates
the use of strided load instructions behind the SME2 target feature.
However, the instructions are only available in streaming mode.
3760a2a to
e99475c
Compare
The selection code for aarch64_sve_ld[nt]1_pn_x{2,4} intrinsics gates the use of strided load instructions behind the SME2 target feature. However, the instructions are only available in streaming mode.