AMDGPU: Fold 64-bit immediate into copy to AV class #155615

arsenm · 2025-08-27T13:00:27Z

This is in preparation for patches which will intoduce more
copies to av registers.

arsenm · 2025-08-27T13:00:52Z

This stack of pull requests is managed by Graphite. Learn more about stacking.

llvmbot · 2025-08-27T13:01:13Z

@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)

Changes

This is in preparation for patches which will intoduce more
copies to av registers.

Full diff: https://github.com/llvm/llvm-project/pull/155615.diff

5 Files Affected:

(modified) llvm/lib/Target/AMDGPU/SIDefines.h (+5-5)
(modified) llvm/lib/Target/AMDGPU/SIFoldOperands.cpp (+19-6)
(modified) llvm/lib/Target/AMDGPU/SIInstrInfo.cpp (+1-5)
(modified) llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir (+34-51)
(modified) llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir (+11-15)

diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index 268b153c6c924..150e05b59c29f 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -237,16 +237,16 @@ enum OperandType : unsigned {
   OPERAND_REG_INLINE_AC_FP32,
   OPERAND_REG_INLINE_AC_FP64,
 
+  // Operand for AV_MOV_B64_IMM_PSEUDO, which is a pair of 32-bit inline
+  // constants. Does not accept registers.
+  OPERAND_INLINE_C_AV64_PSEUDO,
+
   // Operand for source modifiers for VOP instructions
   OPERAND_INPUT_MODS,
 
   // Operand for SDWA instructions
   OPERAND_SDWA_VOPC_DST,
 
-  // Operand for AV_MOV_B64_IMM_PSEUDO, which is a pair of 32-bit inline
-  // constants.
-  OPERAND_INLINE_C_AV64_PSEUDO,
-
   OPERAND_REG_IMM_FIRST = OPERAND_REG_IMM_INT32,
   OPERAND_REG_IMM_LAST = OPERAND_REG_IMM_V2FP32,
 
@@ -254,7 +254,7 @@ enum OperandType : unsigned {
   OPERAND_REG_INLINE_C_LAST = OPERAND_REG_INLINE_AC_FP64,
 
   OPERAND_REG_INLINE_AC_FIRST = OPERAND_REG_INLINE_AC_INT32,
-  OPERAND_REG_INLINE_AC_LAST = OPERAND_REG_INLINE_AC_FP64,
+  OPERAND_REG_INLINE_AC_LAST = OPERAND_INLINE_C_AV64_PSEUDO,
 
   OPERAND_SRC_FIRST = OPERAND_REG_IMM_INT32,
   OPERAND_SRC_LAST = OPERAND_REG_INLINE_C_LAST,
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index a116b57c85a88..92eaa8b29ccb8 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1296,7 +1296,8 @@ void SIFoldOperandsImpl::foldOperand(
     for (unsigned MovOp :
          {AMDGPU::S_MOV_B32, AMDGPU::V_MOV_B32_e32, AMDGPU::S_MOV_B64,
           AMDGPU::V_MOV_B64_PSEUDO, AMDGPU::V_MOV_B16_t16_e64,
-          AMDGPU::V_ACCVGPR_WRITE_B32_e64, AMDGPU::AV_MOV_B32_IMM_PSEUDO}) {
+          AMDGPU::V_ACCVGPR_WRITE_B32_e64, AMDGPU::AV_MOV_B32_IMM_PSEUDO,
+          AMDGPU::AV_MOV_B64_IMM_PSEUDO}) {
       const MCInstrDesc &MovDesc = TII->get(MovOp);
       assert(MovDesc.getNumDefs() > 0 && MovDesc.operands()[0].RegClass != -1);
 
@@ -1312,11 +1313,23 @@ void SIFoldOperandsImpl::foldOperand(
       const int SrcIdx = MovOp == AMDGPU::V_MOV_B16_t16_e64 ? 2 : 1;
       const TargetRegisterClass *MovSrcRC =
           TRI->getRegClass(MovDesc.operands()[SrcIdx].RegClass);
-
-      if (UseSubReg)
-        MovSrcRC = TRI->getMatchingSuperRegClass(SrcRC, MovSrcRC, UseSubReg);
-      if (!MRI->constrainRegClass(SrcReg, MovSrcRC))
-        break;
+      if (MovSrcRC) {
+        if (UseSubReg)
+          MovSrcRC = TRI->getMatchingSuperRegClass(SrcRC, MovSrcRC, UseSubReg);
+        if (!MRI->constrainRegClass(SrcReg, MovSrcRC))
+          break;
+
+        // FIXME: This is mutating the instruction only and deferring the actual
+        // fold of the immediate
+      } else {
+        // For the _IMM_PSEUDO cases, there can be value restrictions on the
+        // immediate to verify. Technically we should always verify this, but it
+        // only matters for these concrete cases.
+        // TODO: Handle non-imm case if it's useful.
+        if (!OpToFold.isImm() ||
+            !TII->isImmOperandLegal(MovDesc, 1, *OpToFold.getEffectiveImmVal()))
+          break;
+      }
 
       MachineInstr::mop_iterator ImpOpI = UseMI->implicit_operands().begin();
       MachineInstr::mop_iterator ImpOpE = UseMI->implicit_operands().end();
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 47236e9d49f8c..c0b994432948b 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3444,12 +3444,8 @@ bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) {
   case AMDGPU::V_ACCVGPR_READ_B32_e64:
   case AMDGPU::V_ACCVGPR_MOV_B32:
   case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
-    return true;
   case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
-    // TODO: We could fold this, but it's a strange case. The immediate value
-    // can't be directly folded into any real use. We would have to spread new
-    // immediate legality checks around and only accept subregister extracts for
-    // profitability.
+    return true;
   default:
     return false;
   }
diff --git a/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir b/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir
index 6f2e33900a79a..73cdcddbef135 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir
@@ -7,9 +7,8 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: v_mov_b64_pseudo_imm_0_copy_to_areg_64
-    ; GCN: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 0, implicit $exec
-    ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64_align2 = COPY [[V_MOV_B]]
-    ; GCN-NEXT: $agpr0_agpr1 = COPY [[COPY]]
+    ; GCN: [[AV_MOV_:%[0-9]+]]:areg_64_align2 = AV_MOV_B64_IMM_PSEUDO 0, implicit $exec
+    ; GCN-NEXT: $agpr0_agpr1 = COPY [[AV_MOV_]]
     ; GCN-NEXT: S_ENDPGM 0
     %0:vreg_64_align2 = V_MOV_B64_PSEUDO 0, implicit $exec
     %1:areg_64_align2 = COPY %0
@@ -24,9 +23,8 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: v_mov_b64_pseudo_imm_neg1_copy_to_areg_64
-    ; GCN: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO -1, implicit $exec
-    ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64_align2 = COPY [[V_MOV_B]]
-    ; GCN-NEXT: $agpr0_agpr1 = COPY [[COPY]]
+    ; GCN: [[AV_MOV_:%[0-9]+]]:areg_64_align2 = AV_MOV_B64_IMM_PSEUDO -1, implicit $exec
+    ; GCN-NEXT: $agpr0_agpr1 = COPY [[AV_MOV_]]
     ; GCN-NEXT: S_ENDPGM 0
     %0:vreg_64_align2 = V_MOV_B64_PSEUDO -1, implicit $exec
     %1:areg_64_align2 = COPY %0
@@ -125,9 +123,8 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: v_mov_b64_pseudo_imm_0_copy_to_av_64
-    ; GCN: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 0, implicit $exec
-    ; GCN-NEXT: [[COPY:%[0-9]+]]:av_64_align2 = COPY [[V_MOV_B]]
-    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+    ; GCN: [[AV_MOV_:%[0-9]+]]:av_64_align2 = AV_MOV_B64_IMM_PSEUDO 0, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]]
     %0:vreg_64_align2 = V_MOV_B64_PSEUDO 0, implicit $exec
     %1:av_64_align2 = COPY %0
     S_ENDPGM 0, implicit %1
@@ -226,9 +223,8 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: s_mov_b64_imm_0_copy_to_areg_64
-    ; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0, implicit $exec
-    ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64 = COPY [[S_MOV_B64_]]
-    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+    ; GCN: [[AV_MOV_:%[0-9]+]]:areg_64 = AV_MOV_B64_IMM_PSEUDO 0, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]]
     %0:sreg_64 = S_MOV_B64 0, implicit $exec
     %1:areg_64 = COPY %0
     S_ENDPGM 0, implicit %1
@@ -241,9 +237,8 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: s_mov_b64_imm_0_copy_to_areg_64_align2
-    ; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0, implicit $exec
-    ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64_align2 = COPY [[S_MOV_B64_]]
-    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+    ; GCN: [[AV_MOV_:%[0-9]+]]:areg_64_align2 = AV_MOV_B64_IMM_PSEUDO 0, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]]
     %0:sreg_64 = S_MOV_B64 0, implicit $exec
     %1:areg_64_align2 = COPY %0
     S_ENDPGM 0, implicit %1
@@ -256,9 +251,8 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: s_mov_b64_imm_neg16_copy_to_areg_64
-    ; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -16, implicit $exec
-    ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64 = COPY [[S_MOV_B64_]]
-    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+    ; GCN: [[AV_MOV_:%[0-9]+]]:areg_64 = AV_MOV_B64_IMM_PSEUDO -16, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]]
     %0:sreg_64 = S_MOV_B64 -16, implicit $exec
     %1:areg_64 = COPY %0
     S_ENDPGM 0, implicit %1
@@ -271,9 +265,8 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: s_mov_b64_imm_neg16_copy_to_areg_64_align2
-    ; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -16, implicit $exec
-    ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64_align2 = COPY [[S_MOV_B64_]]
-    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+    ; GCN: [[AV_MOV_:%[0-9]+]]:areg_64_align2 = AV_MOV_B64_IMM_PSEUDO -16, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]]
     %0:sreg_64 = S_MOV_B64 -16, implicit $exec
     %1:areg_64_align2 = COPY %0
     S_ENDPGM 0, implicit %1
@@ -286,9 +279,8 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: s_mov_b64_imm_0_copy_to_av_64
-    ; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0, implicit $exec
-    ; GCN-NEXT: [[COPY:%[0-9]+]]:av_64 = COPY [[S_MOV_B64_]]
-    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+    ; GCN: [[AV_MOV_:%[0-9]+]]:av_64 = AV_MOV_B64_IMM_PSEUDO 0, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]]
     %0:sreg_64 = S_MOV_B64 0, implicit $exec
     %1:av_64 = COPY %0
     S_ENDPGM 0, implicit %1
@@ -301,9 +293,8 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: s_mov_b64_imm_0_copy_to_av_64_align2
-    ; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0, implicit $exec
-    ; GCN-NEXT: [[COPY:%[0-9]+]]:av_64_align2 = COPY [[S_MOV_B64_]]
-    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+    ; GCN: [[AV_MOV_:%[0-9]+]]:av_64_align2 = AV_MOV_B64_IMM_PSEUDO 0, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]]
     %0:sreg_64 = S_MOV_B64 0, implicit $exec
     %1:av_64_align2 = COPY %0
     S_ENDPGM 0, implicit %1
@@ -316,9 +307,8 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: s_mov_b64_imm_neg16_copy_to_av_64
-    ; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -16, implicit $exec
-    ; GCN-NEXT: [[COPY:%[0-9]+]]:av_64 = COPY [[S_MOV_B64_]]
-    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+    ; GCN: [[AV_MOV_:%[0-9]+]]:av_64 = AV_MOV_B64_IMM_PSEUDO -16, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]]
     %0:sreg_64 = S_MOV_B64 -16, implicit $exec
     %1:av_64 = COPY %0
     S_ENDPGM 0, implicit %1
@@ -331,9 +321,8 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: s_mov_b64_imm_neg16_copy_to_av_64_align2
-    ; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -16, implicit $exec
-    ; GCN-NEXT: [[COPY:%[0-9]+]]:av_64_align2 = COPY [[S_MOV_B64_]]
-    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+    ; GCN: [[AV_MOV_:%[0-9]+]]:av_64_align2 = AV_MOV_B64_IMM_PSEUDO -16, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]]
     %0:sreg_64 = S_MOV_B64 -16, implicit $exec
     %1:av_64_align2 = COPY %0
     S_ENDPGM 0, implicit %1
@@ -346,9 +335,8 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: s_mov_b64_imm_pseudo_literal_32_halves_copy_to_areg_64
-    ; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -42949672960, implicit $exec
-    ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64 = COPY [[S_MOV_B]]
-    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+    ; GCN: [[AV_MOV_:%[0-9]+]]:areg_64 = AV_MOV_B64_IMM_PSEUDO -42949672960, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]]
     %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 18446744030759878656, implicit $exec
     %1:areg_64 = COPY %0
     S_ENDPGM 0, implicit %1
@@ -361,9 +349,8 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: s_mov_b64_imm_pseudo_literal_32_halves_copy_to_areg_64_align2
-    ; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -42949672960, implicit $exec
-    ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64_align2 = COPY [[S_MOV_B]]
-    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+    ; GCN: [[AV_MOV_:%[0-9]+]]:areg_64_align2 = AV_MOV_B64_IMM_PSEUDO -42949672960, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]]
     %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 18446744030759878656, implicit $exec
     %1:areg_64_align2 = COPY %0
     S_ENDPGM 0, implicit %1
@@ -376,9 +363,8 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: s_mov_b64_imm_pseudo_inlineimm_32_halves_copy_to_areg_64
-    ; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -21474836480, implicit $exec
-    ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64 = COPY [[S_MOV_B]]
-    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+    ; GCN: [[AV_MOV_:%[0-9]+]]:areg_64 = AV_MOV_B64_IMM_PSEUDO -21474836480, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]]
     %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 18446744052234715136, implicit $exec
     %1:areg_64 = COPY %0
     S_ENDPGM 0, implicit %1
@@ -391,9 +377,8 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: s_mov_b64_imm_pseudo_inlineimm_32_halves_copy_to_areg_64_align2
-    ; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -21474836480, implicit $exec
-    ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64_align2 = COPY [[S_MOV_B]]
-    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+    ; GCN: [[AV_MOV_:%[0-9]+]]:areg_64_align2 = AV_MOV_B64_IMM_PSEUDO -21474836480, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]]
     %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 18446744052234715136, implicit $exec
     %1:areg_64_align2 = COPY %0
     S_ENDPGM 0, implicit %1
@@ -406,9 +391,8 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: s_mov_b64_imm_pseudo_literal_32_halves_copy_to_av_64
-    ; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -42949672960, implicit $exec
-    ; GCN-NEXT: [[COPY:%[0-9]+]]:av_64 = COPY [[S_MOV_B]]
-    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+    ; GCN: [[AV_MOV_:%[0-9]+]]:av_64 = AV_MOV_B64_IMM_PSEUDO -42949672960, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]]
     %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 18446744030759878656, implicit $exec
     %1:av_64 = COPY %0
     S_ENDPGM 0, implicit %1
@@ -421,9 +405,8 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: s_mov_b64_imm_pseudo_literal_32_halves_copy_to_av_64_align2
-    ; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -42949672960, implicit $exec
-    ; GCN-NEXT: [[COPY:%[0-9]+]]:av_64_align2 = COPY [[S_MOV_B]]
-    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+    ; GCN: [[AV_MOV_:%[0-9]+]]:av_64_align2 = AV_MOV_B64_IMM_PSEUDO -42949672960, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]]
     %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 18446744030759878656, implicit $exec
     %1:av_64_align2 = COPY %0
     S_ENDPGM 0, implicit %1
diff --git a/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir b/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir
index ddf2aa34ecd87..dfcf9a1f5c5ae 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir
@@ -816,9 +816,8 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: av_mov_b64_imm_pseudo_copy_av_64_to_virtreg_agpr
-    ; GCN: [[AV_MOV_:%[0-9]+]]:av_64 = AV_MOV_B64_IMM_PSEUDO 0, implicit $exec
-    ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64 = COPY [[AV_MOV_]]
-    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+    ; GCN: [[AV_MOV_:%[0-9]+]]:areg_64 = AV_MOV_B64_IMM_PSEUDO 0, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]]
     %0:av_64 = AV_MOV_B64_IMM_PSEUDO 0, implicit $exec
     %1:areg_64 = COPY %0
     S_ENDPGM 0, implicit %1
@@ -832,9 +831,8 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: av_mov_b64_imm_pseudo_copy_av_64_to_virtreg_vgpr_0
-    ; GCN: [[AV_MOV_:%[0-9]+]]:av_64 = AV_MOV_B64_IMM_PSEUDO 0, implicit $exec
-    ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY [[AV_MOV_]]
-    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+    ; GCN: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 0, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MOV_B]]
     %0:av_64 = AV_MOV_B64_IMM_PSEUDO 0, implicit $exec
     %1:vreg_64 = COPY %0
     S_ENDPGM 0, implicit %1
@@ -848,9 +846,9 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: av_mov_b64_imm_pseudo_copy_av_64_to_virtreg_vgpr_nonsplat_value
-    ; GCN: [[AV_MOV_:%[0-9]+]]:av_64 = AV_MOV_B64_IMM_PSEUDO 274877906961, implicit $exec
-    ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY [[AV_MOV_]]
-    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+    ; GCN: [[AV_MOV_:%[0-9]+]]:vreg_64 = AV_MOV_B64_IMM_PSEUDO 274877906961, implicit $exec
+    ; GCN-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO [[AV_MOV_]], implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MOV_B]]
     %0:av_64 = AV_MOV_B64_IMM_PSEUDO 274877906961, implicit $exec
     %1:vreg_64 = COPY %0
     S_ENDPGM 0, implicit %1
@@ -863,9 +861,8 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: av_mov_b64_imm_pseudo_copy_av_64_to_virtreg_vgpr_nonsplat_value_copy_sub0
-    ; GCN: [[AV_MOV_:%[0-9]+]]:av_64 = AV_MOV_B64_IMM_PSEUDO 274877906961, implicit $exec
-    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[AV_MOV_]].sub0
-    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+    ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 17, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]]
     %0:av_64 = AV_MOV_B64_IMM_PSEUDO 274877906961, implicit $exec
     %1:vgpr_32 = COPY %0.sub0
     S_ENDPGM 0, implicit %1
@@ -878,9 +875,8 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: av_mov_b64_imm_pseudo_copy_av_64_to_virtreg_vgpr_nonsplat_value_copy_sub1
-    ; GCN: [[AV_MOV_:%[0-9]+]]:av_64 = AV_MOV_B64_IMM_PSEUDO 274877906961, implicit $exec
-    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[AV_MOV_]].sub1
-    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+    ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 64, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]]
     %0:av_64 = AV_MOV_B64_IMM_PSEUDO 274877906961, implicit $exec
     %1:vgpr_32 = COPY %0.sub1
     S_ENDPGM 0, implicit %1

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

This loop over all the operands in the MachineInstr will eventually go past the end of the MCInstrDesc's explicit operands. We don't need the instr desc to compute the constant bus usage, just the register and whether it's implicit or not. The check here is slightly conservative. e.g. a random vcc implicit use appended to an instruction will falsely report a constant bus use.

The goal is to expose more variants that can operate without preconstructed MachineInstrs or MachineOperands.

This is in preparation for patches which will intoduce more copies to av registers.

vvereschaka · 2025-09-03T20:01:10Z

any fixes?

https://lab.llvm.org/buildbot/#/builders/187/builds/10892

******************** TEST 'LLVM :: CodeGen/AMDGPU/fold-imm-copy-agpr.mir' FAILED ********************
Exit Code: 2
Command Output (stderr):
--
/home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/build/bin/llc -mtriple=amdgcn -mcpu=gfx90a -run-pass=si-fold-operands /home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/llvm-project/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir -o - | /home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/build/bin/FileCheck -check-prefix=GCN /home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/llvm-project/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir # RUN: at line 2
+ /home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/build/bin/llc -mtriple=amdgcn -mcpu=gfx90a -run-pass=si-fold-operands /home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/llvm-project/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir -o -
+ /home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/build/bin/FileCheck -check-prefix=GCN /home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/llvm-project/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir
# After SI Fold Operands
# Machine code for function s_mov_b64_imm_0_copy_to_areg_64: IsSSA, NoPHIs, TracksLiveness
bb.0:
  %1:areg_64 = AV_MOV_B64_IMM_PSEUDO 0, implicit $exec
  S_ENDPGM 0, implicit %1:areg_64
# End machine code for function s_mov_b64_imm_0_copy_to_areg_64.
*** Bad machine code: Subtarget requires even aligned vector registers ***
- function:    s_mov_b64_imm_0_copy_to_areg_64
- basic block: %bb.0  (0x5cd23aa6d5b8)
- instruction: %1:areg_64 = AV_MOV_B64_IMM_PSEUDO 0, implicit $exec
*** Bad machine code: Illegal virtual register for instruction ***
- function:    s_mov_b64_imm_0_copy_to_areg_64
- basic block: %bb.0  (0x5cd23aa6d5b8)
- instruction: %1:areg_64 = AV_MOV_B64_IMM_PSEUDO 0, implicit $exec
- operand 0:   %1:areg_64
Expected a AV_64_Align2 register, but got a AReg_64 register
LLVM ERROR: Found 2 machine code errors.
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
Stack dump:
0.	Program arguments: /home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/build/bin/llc -mtriple=amdgcn -mcpu=gfx90a -run-pass=si-fold-operands /home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/llvm-project/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir -o -
1.	Running pass 'Function Pass Manager' on module '/home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/llvm-project/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir'.
2.	Running pass 'Verify generated machine code' on function '@s_mov_b64_imm_0_copy_to_areg_64'
 #0 0x00005cd2348ee4c8 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (/home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/build/bin/llc+0x78764c8)
 #1 0x00005cd2348ebbd5 llvm::sys::RunSignalHandlers() (/home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/build/bin/llc+0x7873bd5)
 #2 0x00005cd2348ef291 SignalHandler(int, siginfo_t*, void*) Signals.cpp:0:0
 #3 0x00007fc865c45330 (/lib/x86_64-linux-gnu/libc.so.6+0x45330)
 #4 0x00007fc865c9eb2c pthread_kill (/lib/x86_64-linux-gnu/libc.so.6+0x9eb2c)
 #5 0x00007fc865c4527e raise (/lib/x86_64-linux-gnu/libc.so.6+0x4527e)
 #6 0x00007fc865c288ff abort (/lib/x86_64-linux-gnu/libc.so.6+0x288ff)
 #7 0x00005cd234854365 llvm::report_fatal_error(llvm::Twine const&, bool) (/home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/build/bin/llc+0x77dc365)
 #8 0x00005cd233a3222e (/home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/build/bin/llc+0x69ba22e)
 #9 0x00005cd233a3320b (anonymous namespace)::MachineVerifierLegacyPass::runOnMachineFunction(llvm::MachineFunction&) MachineVerifier.cpp:0:0
#10 0x00005cd2339101f3 llvm::MachineFunctionPass::runOnFunction(llvm::Function&) (/home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/build/bin/llc+0x68981f3)
#11 0x00005cd233e691cb llvm::FPPassManager::runOnFunction(llvm::Function&) (/home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/build/bin/llc+0x6df11cb)
#12 0x00005cd233e71262 llvm::FPPassManager::runOnModule(llvm::Module&) (/home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/build/bin/llc+0x6df9262)
#13 0x00005cd233e69cea llvm::legacy::PassManagerImpl::run(llvm::Module&) (/home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/build/bin/llc+0x6df1cea)
#14 0x00005cd231873519 compileModule(char**, llvm::LLVMContext&) llc.cpp:0:0
#15 0x00005cd231870b5d main (/home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/build/bin/llc+0x47f8b5d)
#16 0x00007fc865c2a1ca (/lib/x86_64-linux-gnu/libc.so.6+0x2a1ca)
#17 0x00007fc865c2a28b __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x2a28b)
#18 0x00005cd23186caa5 _start (/home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/build/bin/llc+0x47f4aa5)
FileCheck error: '<stdin>' is empty.
FileCheck command line:  /home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/build/bin/FileCheck -check-prefix=GCN /home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/llvm-project/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir
--
********************

arsenm · 2025-09-04T00:56:17Z

any fixes?

dc170c7

This was referenced Aug 27, 2025

AMDGPU: Refactor isImmOperandLegal #155607

Merged

AMDGPU: Stop special casing aligned VGPR targets in operand folding #155559

Merged

AMDGPU: Add version of isImmOperandLegal for MCInstrDesc #155560

Merged

arsenm added the backend:AMDGPU label Aug 27, 2025 — with Graphite App

This was referenced Aug 27, 2025

AMDGPU: Fix DPP combiner using isOperandLegal on incomplete inst #155595

Merged

AMDGPU: Fix fixme for out of bounds indexing in usesConstantBus check #155603

Merged

arsenm requested review from Pierre-vh, Sisyph, broxigarchen, cdevadas, chrisjbris, frederik-h, kosarev, mbrkusanin, petar-avramovic, rampitec, rovka, shiltian and vpykhtin August 27, 2025 13:00

arsenm marked this pull request as ready for review August 27, 2025 13:01

chrisjbris reviewed Aug 27, 2025

View reviewed changes

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp Outdated Show resolved Hide resolved

arsenm force-pushed the users/arsenm/amdgpu/fold-s-mov-imm-64-into-av-copy branch from e6418df to 49d1410 Compare August 27, 2025 13:39

arsenm force-pushed the users/arsenm/amdgpu/refactor-isImmOperandLegal branch from 6aaa543 to e08813a Compare August 27, 2025 13:39

Sisyph approved these changes Aug 27, 2025

View reviewed changes

arsenm force-pushed the users/arsenm/amdgpu/fold-s-mov-imm-64-into-av-copy branch from 49d1410 to bc2070b Compare September 2, 2025 14:26

arsenm force-pushed the users/arsenm/amdgpu/refactor-isImmOperandLegal branch 2 times, most recently from 2ee13ab to 9690466 Compare September 2, 2025 15:38

arsenm force-pushed the users/arsenm/amdgpu/fold-s-mov-imm-64-into-av-copy branch from bc2070b to 8961a0c Compare September 2, 2025 15:38

arsenm added 4 commits September 3, 2025 01:22

merge exec check

ec7dbe2

AMDGPU: Refactor isImmOperandLegal

20442f6

The goal is to expose more variants that can operate without preconstructed MachineInstrs or MachineOperands.

AMDGPU: Fold 64-bit immediate into copy to AV class

0582195

This is in preparation for patches which will intoduce more copies to av registers.

arsenm force-pushed the users/arsenm/amdgpu/fold-s-mov-imm-64-into-av-copy branch from 8961a0c to 0582195 Compare September 2, 2025 16:49

arsenm force-pushed the users/arsenm/amdgpu/refactor-isImmOperandLegal branch from 9690466 to 20442f6 Compare September 2, 2025 16:49

Base automatically changed from users/arsenm/amdgpu/refactor-isImmOperandLegal to main September 3, 2025 00:06

arsenm merged commit dd5eb46 into main Sep 3, 2025
12 of 15 checks passed

arsenm deleted the users/arsenm/amdgpu/fold-s-mov-imm-64-into-av-copy branch September 3, 2025 00:30

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

AMDGPU: Fold 64-bit immediate into copy to AV class #155615

AMDGPU: Fold 64-bit immediate into copy to AV class #155615

Uh oh!

arsenm commented Aug 27, 2025

Uh oh!

arsenm commented Aug 27, 2025 •

edited

Loading

Uh oh!

llvmbot commented Aug 27, 2025

Uh oh!

Uh oh!

Uh oh!

vvereschaka commented Sep 3, 2025

Uh oh!

arsenm commented Sep 4, 2025

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

6 participants

AMDGPU: Fold 64-bit immediate into copy to AV class #155615

AMDGPU: Fold 64-bit immediate into copy to AV class #155615

Uh oh!

Conversation

arsenm commented Aug 27, 2025

Uh oh!

arsenm commented Aug 27, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Aug 27, 2025

Uh oh!

Uh oh!

Uh oh!

vvereschaka commented Sep 3, 2025

Uh oh!

arsenm commented Sep 4, 2025

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

6 participants

arsenm commented Aug 27, 2025 •

edited

Loading