-
Notifications
You must be signed in to change notification settings - Fork 15.2k
AMDGPU: Fold 64-bit immediate into copy to AV class #155615
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
arsenm
merged 4 commits into
main
from
users/arsenm/amdgpu/fold-s-mov-imm-64-into-av-copy
Sep 3, 2025
Merged
AMDGPU: Fold 64-bit immediate into copy to AV class #155615
arsenm
merged 4 commits into
main
from
users/arsenm/amdgpu/fold-s-mov-imm-64-into-av-copy
Sep 3, 2025
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This was referenced Aug 27, 2025
This was referenced Aug 27, 2025
Contributor
Author
Member
|
@llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) ChangesThis is in preparation for patches which will intoduce more Full diff: https://github.com/llvm/llvm-project/pull/155615.diff 5 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index 268b153c6c924..150e05b59c29f 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -237,16 +237,16 @@ enum OperandType : unsigned {
OPERAND_REG_INLINE_AC_FP32,
OPERAND_REG_INLINE_AC_FP64,
+ // Operand for AV_MOV_B64_IMM_PSEUDO, which is a pair of 32-bit inline
+ // constants. Does not accept registers.
+ OPERAND_INLINE_C_AV64_PSEUDO,
+
// Operand for source modifiers for VOP instructions
OPERAND_INPUT_MODS,
// Operand for SDWA instructions
OPERAND_SDWA_VOPC_DST,
- // Operand for AV_MOV_B64_IMM_PSEUDO, which is a pair of 32-bit inline
- // constants.
- OPERAND_INLINE_C_AV64_PSEUDO,
-
OPERAND_REG_IMM_FIRST = OPERAND_REG_IMM_INT32,
OPERAND_REG_IMM_LAST = OPERAND_REG_IMM_V2FP32,
@@ -254,7 +254,7 @@ enum OperandType : unsigned {
OPERAND_REG_INLINE_C_LAST = OPERAND_REG_INLINE_AC_FP64,
OPERAND_REG_INLINE_AC_FIRST = OPERAND_REG_INLINE_AC_INT32,
- OPERAND_REG_INLINE_AC_LAST = OPERAND_REG_INLINE_AC_FP64,
+ OPERAND_REG_INLINE_AC_LAST = OPERAND_INLINE_C_AV64_PSEUDO,
OPERAND_SRC_FIRST = OPERAND_REG_IMM_INT32,
OPERAND_SRC_LAST = OPERAND_REG_INLINE_C_LAST,
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index a116b57c85a88..92eaa8b29ccb8 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1296,7 +1296,8 @@ void SIFoldOperandsImpl::foldOperand(
for (unsigned MovOp :
{AMDGPU::S_MOV_B32, AMDGPU::V_MOV_B32_e32, AMDGPU::S_MOV_B64,
AMDGPU::V_MOV_B64_PSEUDO, AMDGPU::V_MOV_B16_t16_e64,
- AMDGPU::V_ACCVGPR_WRITE_B32_e64, AMDGPU::AV_MOV_B32_IMM_PSEUDO}) {
+ AMDGPU::V_ACCVGPR_WRITE_B32_e64, AMDGPU::AV_MOV_B32_IMM_PSEUDO,
+ AMDGPU::AV_MOV_B64_IMM_PSEUDO}) {
const MCInstrDesc &MovDesc = TII->get(MovOp);
assert(MovDesc.getNumDefs() > 0 && MovDesc.operands()[0].RegClass != -1);
@@ -1312,11 +1313,23 @@ void SIFoldOperandsImpl::foldOperand(
const int SrcIdx = MovOp == AMDGPU::V_MOV_B16_t16_e64 ? 2 : 1;
const TargetRegisterClass *MovSrcRC =
TRI->getRegClass(MovDesc.operands()[SrcIdx].RegClass);
-
- if (UseSubReg)
- MovSrcRC = TRI->getMatchingSuperRegClass(SrcRC, MovSrcRC, UseSubReg);
- if (!MRI->constrainRegClass(SrcReg, MovSrcRC))
- break;
+ if (MovSrcRC) {
+ if (UseSubReg)
+ MovSrcRC = TRI->getMatchingSuperRegClass(SrcRC, MovSrcRC, UseSubReg);
+ if (!MRI->constrainRegClass(SrcReg, MovSrcRC))
+ break;
+
+ // FIXME: This is mutating the instruction only and deferring the actual
+ // fold of the immediate
+ } else {
+ // For the _IMM_PSEUDO cases, there can be value restrictions on the
+ // immediate to verify. Technically we should always verify this, but it
+ // only matters for these concrete cases.
+ // TODO: Handle non-imm case if it's useful.
+ if (!OpToFold.isImm() ||
+ !TII->isImmOperandLegal(MovDesc, 1, *OpToFold.getEffectiveImmVal()))
+ break;
+ }
MachineInstr::mop_iterator ImpOpI = UseMI->implicit_operands().begin();
MachineInstr::mop_iterator ImpOpE = UseMI->implicit_operands().end();
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 47236e9d49f8c..c0b994432948b 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3444,12 +3444,8 @@ bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) {
case AMDGPU::V_ACCVGPR_READ_B32_e64:
case AMDGPU::V_ACCVGPR_MOV_B32:
case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
- return true;
case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
- // TODO: We could fold this, but it's a strange case. The immediate value
- // can't be directly folded into any real use. We would have to spread new
- // immediate legality checks around and only accept subregister extracts for
- // profitability.
+ return true;
default:
return false;
}
diff --git a/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir b/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir
index 6f2e33900a79a..73cdcddbef135 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir
@@ -7,9 +7,8 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: v_mov_b64_pseudo_imm_0_copy_to_areg_64
- ; GCN: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 0, implicit $exec
- ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64_align2 = COPY [[V_MOV_B]]
- ; GCN-NEXT: $agpr0_agpr1 = COPY [[COPY]]
+ ; GCN: [[AV_MOV_:%[0-9]+]]:areg_64_align2 = AV_MOV_B64_IMM_PSEUDO 0, implicit $exec
+ ; GCN-NEXT: $agpr0_agpr1 = COPY [[AV_MOV_]]
; GCN-NEXT: S_ENDPGM 0
%0:vreg_64_align2 = V_MOV_B64_PSEUDO 0, implicit $exec
%1:areg_64_align2 = COPY %0
@@ -24,9 +23,8 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: v_mov_b64_pseudo_imm_neg1_copy_to_areg_64
- ; GCN: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO -1, implicit $exec
- ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64_align2 = COPY [[V_MOV_B]]
- ; GCN-NEXT: $agpr0_agpr1 = COPY [[COPY]]
+ ; GCN: [[AV_MOV_:%[0-9]+]]:areg_64_align2 = AV_MOV_B64_IMM_PSEUDO -1, implicit $exec
+ ; GCN-NEXT: $agpr0_agpr1 = COPY [[AV_MOV_]]
; GCN-NEXT: S_ENDPGM 0
%0:vreg_64_align2 = V_MOV_B64_PSEUDO -1, implicit $exec
%1:areg_64_align2 = COPY %0
@@ -125,9 +123,8 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: v_mov_b64_pseudo_imm_0_copy_to_av_64
- ; GCN: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 0, implicit $exec
- ; GCN-NEXT: [[COPY:%[0-9]+]]:av_64_align2 = COPY [[V_MOV_B]]
- ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+ ; GCN: [[AV_MOV_:%[0-9]+]]:av_64_align2 = AV_MOV_B64_IMM_PSEUDO 0, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]]
%0:vreg_64_align2 = V_MOV_B64_PSEUDO 0, implicit $exec
%1:av_64_align2 = COPY %0
S_ENDPGM 0, implicit %1
@@ -226,9 +223,8 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: s_mov_b64_imm_0_copy_to_areg_64
- ; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0, implicit $exec
- ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64 = COPY [[S_MOV_B64_]]
- ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+ ; GCN: [[AV_MOV_:%[0-9]+]]:areg_64 = AV_MOV_B64_IMM_PSEUDO 0, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]]
%0:sreg_64 = S_MOV_B64 0, implicit $exec
%1:areg_64 = COPY %0
S_ENDPGM 0, implicit %1
@@ -241,9 +237,8 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: s_mov_b64_imm_0_copy_to_areg_64_align2
- ; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0, implicit $exec
- ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64_align2 = COPY [[S_MOV_B64_]]
- ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+ ; GCN: [[AV_MOV_:%[0-9]+]]:areg_64_align2 = AV_MOV_B64_IMM_PSEUDO 0, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]]
%0:sreg_64 = S_MOV_B64 0, implicit $exec
%1:areg_64_align2 = COPY %0
S_ENDPGM 0, implicit %1
@@ -256,9 +251,8 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: s_mov_b64_imm_neg16_copy_to_areg_64
- ; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -16, implicit $exec
- ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64 = COPY [[S_MOV_B64_]]
- ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+ ; GCN: [[AV_MOV_:%[0-9]+]]:areg_64 = AV_MOV_B64_IMM_PSEUDO -16, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]]
%0:sreg_64 = S_MOV_B64 -16, implicit $exec
%1:areg_64 = COPY %0
S_ENDPGM 0, implicit %1
@@ -271,9 +265,8 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: s_mov_b64_imm_neg16_copy_to_areg_64_align2
- ; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -16, implicit $exec
- ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64_align2 = COPY [[S_MOV_B64_]]
- ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+ ; GCN: [[AV_MOV_:%[0-9]+]]:areg_64_align2 = AV_MOV_B64_IMM_PSEUDO -16, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]]
%0:sreg_64 = S_MOV_B64 -16, implicit $exec
%1:areg_64_align2 = COPY %0
S_ENDPGM 0, implicit %1
@@ -286,9 +279,8 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: s_mov_b64_imm_0_copy_to_av_64
- ; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0, implicit $exec
- ; GCN-NEXT: [[COPY:%[0-9]+]]:av_64 = COPY [[S_MOV_B64_]]
- ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+ ; GCN: [[AV_MOV_:%[0-9]+]]:av_64 = AV_MOV_B64_IMM_PSEUDO 0, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]]
%0:sreg_64 = S_MOV_B64 0, implicit $exec
%1:av_64 = COPY %0
S_ENDPGM 0, implicit %1
@@ -301,9 +293,8 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: s_mov_b64_imm_0_copy_to_av_64_align2
- ; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0, implicit $exec
- ; GCN-NEXT: [[COPY:%[0-9]+]]:av_64_align2 = COPY [[S_MOV_B64_]]
- ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+ ; GCN: [[AV_MOV_:%[0-9]+]]:av_64_align2 = AV_MOV_B64_IMM_PSEUDO 0, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]]
%0:sreg_64 = S_MOV_B64 0, implicit $exec
%1:av_64_align2 = COPY %0
S_ENDPGM 0, implicit %1
@@ -316,9 +307,8 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: s_mov_b64_imm_neg16_copy_to_av_64
- ; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -16, implicit $exec
- ; GCN-NEXT: [[COPY:%[0-9]+]]:av_64 = COPY [[S_MOV_B64_]]
- ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+ ; GCN: [[AV_MOV_:%[0-9]+]]:av_64 = AV_MOV_B64_IMM_PSEUDO -16, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]]
%0:sreg_64 = S_MOV_B64 -16, implicit $exec
%1:av_64 = COPY %0
S_ENDPGM 0, implicit %1
@@ -331,9 +321,8 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: s_mov_b64_imm_neg16_copy_to_av_64_align2
- ; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -16, implicit $exec
- ; GCN-NEXT: [[COPY:%[0-9]+]]:av_64_align2 = COPY [[S_MOV_B64_]]
- ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+ ; GCN: [[AV_MOV_:%[0-9]+]]:av_64_align2 = AV_MOV_B64_IMM_PSEUDO -16, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]]
%0:sreg_64 = S_MOV_B64 -16, implicit $exec
%1:av_64_align2 = COPY %0
S_ENDPGM 0, implicit %1
@@ -346,9 +335,8 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: s_mov_b64_imm_pseudo_literal_32_halves_copy_to_areg_64
- ; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -42949672960, implicit $exec
- ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64 = COPY [[S_MOV_B]]
- ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+ ; GCN: [[AV_MOV_:%[0-9]+]]:areg_64 = AV_MOV_B64_IMM_PSEUDO -42949672960, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]]
%0:sreg_64 = S_MOV_B64_IMM_PSEUDO 18446744030759878656, implicit $exec
%1:areg_64 = COPY %0
S_ENDPGM 0, implicit %1
@@ -361,9 +349,8 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: s_mov_b64_imm_pseudo_literal_32_halves_copy_to_areg_64_align2
- ; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -42949672960, implicit $exec
- ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64_align2 = COPY [[S_MOV_B]]
- ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+ ; GCN: [[AV_MOV_:%[0-9]+]]:areg_64_align2 = AV_MOV_B64_IMM_PSEUDO -42949672960, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]]
%0:sreg_64 = S_MOV_B64_IMM_PSEUDO 18446744030759878656, implicit $exec
%1:areg_64_align2 = COPY %0
S_ENDPGM 0, implicit %1
@@ -376,9 +363,8 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: s_mov_b64_imm_pseudo_inlineimm_32_halves_copy_to_areg_64
- ; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -21474836480, implicit $exec
- ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64 = COPY [[S_MOV_B]]
- ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+ ; GCN: [[AV_MOV_:%[0-9]+]]:areg_64 = AV_MOV_B64_IMM_PSEUDO -21474836480, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]]
%0:sreg_64 = S_MOV_B64_IMM_PSEUDO 18446744052234715136, implicit $exec
%1:areg_64 = COPY %0
S_ENDPGM 0, implicit %1
@@ -391,9 +377,8 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: s_mov_b64_imm_pseudo_inlineimm_32_halves_copy_to_areg_64_align2
- ; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -21474836480, implicit $exec
- ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64_align2 = COPY [[S_MOV_B]]
- ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+ ; GCN: [[AV_MOV_:%[0-9]+]]:areg_64_align2 = AV_MOV_B64_IMM_PSEUDO -21474836480, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]]
%0:sreg_64 = S_MOV_B64_IMM_PSEUDO 18446744052234715136, implicit $exec
%1:areg_64_align2 = COPY %0
S_ENDPGM 0, implicit %1
@@ -406,9 +391,8 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: s_mov_b64_imm_pseudo_literal_32_halves_copy_to_av_64
- ; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -42949672960, implicit $exec
- ; GCN-NEXT: [[COPY:%[0-9]+]]:av_64 = COPY [[S_MOV_B]]
- ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+ ; GCN: [[AV_MOV_:%[0-9]+]]:av_64 = AV_MOV_B64_IMM_PSEUDO -42949672960, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]]
%0:sreg_64 = S_MOV_B64_IMM_PSEUDO 18446744030759878656, implicit $exec
%1:av_64 = COPY %0
S_ENDPGM 0, implicit %1
@@ -421,9 +405,8 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: s_mov_b64_imm_pseudo_literal_32_halves_copy_to_av_64_align2
- ; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -42949672960, implicit $exec
- ; GCN-NEXT: [[COPY:%[0-9]+]]:av_64_align2 = COPY [[S_MOV_B]]
- ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+ ; GCN: [[AV_MOV_:%[0-9]+]]:av_64_align2 = AV_MOV_B64_IMM_PSEUDO -42949672960, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]]
%0:sreg_64 = S_MOV_B64_IMM_PSEUDO 18446744030759878656, implicit $exec
%1:av_64_align2 = COPY %0
S_ENDPGM 0, implicit %1
diff --git a/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir b/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir
index ddf2aa34ecd87..dfcf9a1f5c5ae 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir
@@ -816,9 +816,8 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: av_mov_b64_imm_pseudo_copy_av_64_to_virtreg_agpr
- ; GCN: [[AV_MOV_:%[0-9]+]]:av_64 = AV_MOV_B64_IMM_PSEUDO 0, implicit $exec
- ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64 = COPY [[AV_MOV_]]
- ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+ ; GCN: [[AV_MOV_:%[0-9]+]]:areg_64 = AV_MOV_B64_IMM_PSEUDO 0, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]]
%0:av_64 = AV_MOV_B64_IMM_PSEUDO 0, implicit $exec
%1:areg_64 = COPY %0
S_ENDPGM 0, implicit %1
@@ -832,9 +831,8 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: av_mov_b64_imm_pseudo_copy_av_64_to_virtreg_vgpr_0
- ; GCN: [[AV_MOV_:%[0-9]+]]:av_64 = AV_MOV_B64_IMM_PSEUDO 0, implicit $exec
- ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY [[AV_MOV_]]
- ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+ ; GCN: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 0, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MOV_B]]
%0:av_64 = AV_MOV_B64_IMM_PSEUDO 0, implicit $exec
%1:vreg_64 = COPY %0
S_ENDPGM 0, implicit %1
@@ -848,9 +846,9 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: av_mov_b64_imm_pseudo_copy_av_64_to_virtreg_vgpr_nonsplat_value
- ; GCN: [[AV_MOV_:%[0-9]+]]:av_64 = AV_MOV_B64_IMM_PSEUDO 274877906961, implicit $exec
- ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY [[AV_MOV_]]
- ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+ ; GCN: [[AV_MOV_:%[0-9]+]]:vreg_64 = AV_MOV_B64_IMM_PSEUDO 274877906961, implicit $exec
+ ; GCN-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO [[AV_MOV_]], implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MOV_B]]
%0:av_64 = AV_MOV_B64_IMM_PSEUDO 274877906961, implicit $exec
%1:vreg_64 = COPY %0
S_ENDPGM 0, implicit %1
@@ -863,9 +861,8 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: av_mov_b64_imm_pseudo_copy_av_64_to_virtreg_vgpr_nonsplat_value_copy_sub0
- ; GCN: [[AV_MOV_:%[0-9]+]]:av_64 = AV_MOV_B64_IMM_PSEUDO 274877906961, implicit $exec
- ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[AV_MOV_]].sub0
- ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+ ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 17, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]]
%0:av_64 = AV_MOV_B64_IMM_PSEUDO 274877906961, implicit $exec
%1:vgpr_32 = COPY %0.sub0
S_ENDPGM 0, implicit %1
@@ -878,9 +875,8 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: av_mov_b64_imm_pseudo_copy_av_64_to_virtreg_vgpr_nonsplat_value_copy_sub1
- ; GCN: [[AV_MOV_:%[0-9]+]]:av_64 = AV_MOV_B64_IMM_PSEUDO 274877906961, implicit $exec
- ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[AV_MOV_]].sub1
- ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+ ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 64, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]]
%0:av_64 = AV_MOV_B64_IMM_PSEUDO 274877906961, implicit $exec
%1:vgpr_32 = COPY %0.sub1
S_ENDPGM 0, implicit %1
|
chrisjbris
reviewed
Aug 27, 2025
e6418df to
49d1410
Compare
6aaa543 to
e08813a
Compare
Sisyph
approved these changes
Aug 27, 2025
49d1410 to
bc2070b
Compare
2ee13ab to
9690466
Compare
bc2070b to
8961a0c
Compare
This loop over all the operands in the MachineInstr will eventually go past the end of the MCInstrDesc's explicit operands. We don't need the instr desc to compute the constant bus usage, just the register and whether it's implicit or not. The check here is slightly conservative. e.g. a random vcc implicit use appended to an instruction will falsely report a constant bus use.
The goal is to expose more variants that can operate without preconstructed MachineInstrs or MachineOperands.
This is in preparation for patches which will intoduce more copies to av registers.
8961a0c to
0582195
Compare
9690466 to
20442f6
Compare
Base automatically changed from
users/arsenm/amdgpu/refactor-isImmOperandLegal
to
main
September 3, 2025 00:06
Contributor
|
any fixes? https://lab.llvm.org/buildbot/#/builders/187/builds/10892 |
Contributor
Author
|
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.

This is in preparation for patches which will intoduce more
copies to av registers.