Skip to content

Conversation

@arsenm
Copy link
Contributor

@arsenm arsenm commented Aug 27, 2025

This is in preparation for patches which will intoduce more
copies to av registers.

@llvmbot
Copy link
Member

llvmbot commented Aug 27, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)

Changes

This is in preparation for patches which will intoduce more
copies to av registers.


Full diff: https://github.com/llvm/llvm-project/pull/155615.diff

5 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/SIDefines.h (+5-5)
  • (modified) llvm/lib/Target/AMDGPU/SIFoldOperands.cpp (+19-6)
  • (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.cpp (+1-5)
  • (modified) llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir (+34-51)
  • (modified) llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir (+11-15)
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index 268b153c6c924..150e05b59c29f 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -237,16 +237,16 @@ enum OperandType : unsigned {
   OPERAND_REG_INLINE_AC_FP32,
   OPERAND_REG_INLINE_AC_FP64,
 
+  // Operand for AV_MOV_B64_IMM_PSEUDO, which is a pair of 32-bit inline
+  // constants. Does not accept registers.
+  OPERAND_INLINE_C_AV64_PSEUDO,
+
   // Operand for source modifiers for VOP instructions
   OPERAND_INPUT_MODS,
 
   // Operand for SDWA instructions
   OPERAND_SDWA_VOPC_DST,
 
-  // Operand for AV_MOV_B64_IMM_PSEUDO, which is a pair of 32-bit inline
-  // constants.
-  OPERAND_INLINE_C_AV64_PSEUDO,
-
   OPERAND_REG_IMM_FIRST = OPERAND_REG_IMM_INT32,
   OPERAND_REG_IMM_LAST = OPERAND_REG_IMM_V2FP32,
 
@@ -254,7 +254,7 @@ enum OperandType : unsigned {
   OPERAND_REG_INLINE_C_LAST = OPERAND_REG_INLINE_AC_FP64,
 
   OPERAND_REG_INLINE_AC_FIRST = OPERAND_REG_INLINE_AC_INT32,
-  OPERAND_REG_INLINE_AC_LAST = OPERAND_REG_INLINE_AC_FP64,
+  OPERAND_REG_INLINE_AC_LAST = OPERAND_INLINE_C_AV64_PSEUDO,
 
   OPERAND_SRC_FIRST = OPERAND_REG_IMM_INT32,
   OPERAND_SRC_LAST = OPERAND_REG_INLINE_C_LAST,
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index a116b57c85a88..92eaa8b29ccb8 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1296,7 +1296,8 @@ void SIFoldOperandsImpl::foldOperand(
     for (unsigned MovOp :
          {AMDGPU::S_MOV_B32, AMDGPU::V_MOV_B32_e32, AMDGPU::S_MOV_B64,
           AMDGPU::V_MOV_B64_PSEUDO, AMDGPU::V_MOV_B16_t16_e64,
-          AMDGPU::V_ACCVGPR_WRITE_B32_e64, AMDGPU::AV_MOV_B32_IMM_PSEUDO}) {
+          AMDGPU::V_ACCVGPR_WRITE_B32_e64, AMDGPU::AV_MOV_B32_IMM_PSEUDO,
+          AMDGPU::AV_MOV_B64_IMM_PSEUDO}) {
       const MCInstrDesc &MovDesc = TII->get(MovOp);
       assert(MovDesc.getNumDefs() > 0 && MovDesc.operands()[0].RegClass != -1);
 
@@ -1312,11 +1313,23 @@ void SIFoldOperandsImpl::foldOperand(
       const int SrcIdx = MovOp == AMDGPU::V_MOV_B16_t16_e64 ? 2 : 1;
       const TargetRegisterClass *MovSrcRC =
           TRI->getRegClass(MovDesc.operands()[SrcIdx].RegClass);
-
-      if (UseSubReg)
-        MovSrcRC = TRI->getMatchingSuperRegClass(SrcRC, MovSrcRC, UseSubReg);
-      if (!MRI->constrainRegClass(SrcReg, MovSrcRC))
-        break;
+      if (MovSrcRC) {
+        if (UseSubReg)
+          MovSrcRC = TRI->getMatchingSuperRegClass(SrcRC, MovSrcRC, UseSubReg);
+        if (!MRI->constrainRegClass(SrcReg, MovSrcRC))
+          break;
+
+        // FIXME: This is mutating the instruction only and deferring the actual
+        // fold of the immediate
+      } else {
+        // For the _IMM_PSEUDO cases, there can be value restrictions on the
+        // immediate to verify. Technically we should always verify this, but it
+        // only matters for these concrete cases.
+        // TODO: Handle non-imm case if it's useful.
+        if (!OpToFold.isImm() ||
+            !TII->isImmOperandLegal(MovDesc, 1, *OpToFold.getEffectiveImmVal()))
+          break;
+      }
 
       MachineInstr::mop_iterator ImpOpI = UseMI->implicit_operands().begin();
       MachineInstr::mop_iterator ImpOpE = UseMI->implicit_operands().end();
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 47236e9d49f8c..c0b994432948b 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3444,12 +3444,8 @@ bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) {
   case AMDGPU::V_ACCVGPR_READ_B32_e64:
   case AMDGPU::V_ACCVGPR_MOV_B32:
   case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
-    return true;
   case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
-    // TODO: We could fold this, but it's a strange case. The immediate value
-    // can't be directly folded into any real use. We would have to spread new
-    // immediate legality checks around and only accept subregister extracts for
-    // profitability.
+    return true;
   default:
     return false;
   }
diff --git a/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir b/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir
index 6f2e33900a79a..73cdcddbef135 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir
@@ -7,9 +7,8 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: v_mov_b64_pseudo_imm_0_copy_to_areg_64
-    ; GCN: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 0, implicit $exec
-    ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64_align2 = COPY [[V_MOV_B]]
-    ; GCN-NEXT: $agpr0_agpr1 = COPY [[COPY]]
+    ; GCN: [[AV_MOV_:%[0-9]+]]:areg_64_align2 = AV_MOV_B64_IMM_PSEUDO 0, implicit $exec
+    ; GCN-NEXT: $agpr0_agpr1 = COPY [[AV_MOV_]]
     ; GCN-NEXT: S_ENDPGM 0
     %0:vreg_64_align2 = V_MOV_B64_PSEUDO 0, implicit $exec
     %1:areg_64_align2 = COPY %0
@@ -24,9 +23,8 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: v_mov_b64_pseudo_imm_neg1_copy_to_areg_64
-    ; GCN: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO -1, implicit $exec
-    ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64_align2 = COPY [[V_MOV_B]]
-    ; GCN-NEXT: $agpr0_agpr1 = COPY [[COPY]]
+    ; GCN: [[AV_MOV_:%[0-9]+]]:areg_64_align2 = AV_MOV_B64_IMM_PSEUDO -1, implicit $exec
+    ; GCN-NEXT: $agpr0_agpr1 = COPY [[AV_MOV_]]
     ; GCN-NEXT: S_ENDPGM 0
     %0:vreg_64_align2 = V_MOV_B64_PSEUDO -1, implicit $exec
     %1:areg_64_align2 = COPY %0
@@ -125,9 +123,8 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: v_mov_b64_pseudo_imm_0_copy_to_av_64
-    ; GCN: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 0, implicit $exec
-    ; GCN-NEXT: [[COPY:%[0-9]+]]:av_64_align2 = COPY [[V_MOV_B]]
-    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+    ; GCN: [[AV_MOV_:%[0-9]+]]:av_64_align2 = AV_MOV_B64_IMM_PSEUDO 0, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]]
     %0:vreg_64_align2 = V_MOV_B64_PSEUDO 0, implicit $exec
     %1:av_64_align2 = COPY %0
     S_ENDPGM 0, implicit %1
@@ -226,9 +223,8 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: s_mov_b64_imm_0_copy_to_areg_64
-    ; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0, implicit $exec
-    ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64 = COPY [[S_MOV_B64_]]
-    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+    ; GCN: [[AV_MOV_:%[0-9]+]]:areg_64 = AV_MOV_B64_IMM_PSEUDO 0, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]]
     %0:sreg_64 = S_MOV_B64 0, implicit $exec
     %1:areg_64 = COPY %0
     S_ENDPGM 0, implicit %1
@@ -241,9 +237,8 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: s_mov_b64_imm_0_copy_to_areg_64_align2
-    ; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0, implicit $exec
-    ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64_align2 = COPY [[S_MOV_B64_]]
-    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+    ; GCN: [[AV_MOV_:%[0-9]+]]:areg_64_align2 = AV_MOV_B64_IMM_PSEUDO 0, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]]
     %0:sreg_64 = S_MOV_B64 0, implicit $exec
     %1:areg_64_align2 = COPY %0
     S_ENDPGM 0, implicit %1
@@ -256,9 +251,8 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: s_mov_b64_imm_neg16_copy_to_areg_64
-    ; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -16, implicit $exec
-    ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64 = COPY [[S_MOV_B64_]]
-    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+    ; GCN: [[AV_MOV_:%[0-9]+]]:areg_64 = AV_MOV_B64_IMM_PSEUDO -16, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]]
     %0:sreg_64 = S_MOV_B64 -16, implicit $exec
     %1:areg_64 = COPY %0
     S_ENDPGM 0, implicit %1
@@ -271,9 +265,8 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: s_mov_b64_imm_neg16_copy_to_areg_64_align2
-    ; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -16, implicit $exec
-    ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64_align2 = COPY [[S_MOV_B64_]]
-    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+    ; GCN: [[AV_MOV_:%[0-9]+]]:areg_64_align2 = AV_MOV_B64_IMM_PSEUDO -16, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]]
     %0:sreg_64 = S_MOV_B64 -16, implicit $exec
     %1:areg_64_align2 = COPY %0
     S_ENDPGM 0, implicit %1
@@ -286,9 +279,8 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: s_mov_b64_imm_0_copy_to_av_64
-    ; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0, implicit $exec
-    ; GCN-NEXT: [[COPY:%[0-9]+]]:av_64 = COPY [[S_MOV_B64_]]
-    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+    ; GCN: [[AV_MOV_:%[0-9]+]]:av_64 = AV_MOV_B64_IMM_PSEUDO 0, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]]
     %0:sreg_64 = S_MOV_B64 0, implicit $exec
     %1:av_64 = COPY %0
     S_ENDPGM 0, implicit %1
@@ -301,9 +293,8 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: s_mov_b64_imm_0_copy_to_av_64_align2
-    ; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0, implicit $exec
-    ; GCN-NEXT: [[COPY:%[0-9]+]]:av_64_align2 = COPY [[S_MOV_B64_]]
-    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+    ; GCN: [[AV_MOV_:%[0-9]+]]:av_64_align2 = AV_MOV_B64_IMM_PSEUDO 0, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]]
     %0:sreg_64 = S_MOV_B64 0, implicit $exec
     %1:av_64_align2 = COPY %0
     S_ENDPGM 0, implicit %1
@@ -316,9 +307,8 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: s_mov_b64_imm_neg16_copy_to_av_64
-    ; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -16, implicit $exec
-    ; GCN-NEXT: [[COPY:%[0-9]+]]:av_64 = COPY [[S_MOV_B64_]]
-    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+    ; GCN: [[AV_MOV_:%[0-9]+]]:av_64 = AV_MOV_B64_IMM_PSEUDO -16, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]]
     %0:sreg_64 = S_MOV_B64 -16, implicit $exec
     %1:av_64 = COPY %0
     S_ENDPGM 0, implicit %1
@@ -331,9 +321,8 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: s_mov_b64_imm_neg16_copy_to_av_64_align2
-    ; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -16, implicit $exec
-    ; GCN-NEXT: [[COPY:%[0-9]+]]:av_64_align2 = COPY [[S_MOV_B64_]]
-    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+    ; GCN: [[AV_MOV_:%[0-9]+]]:av_64_align2 = AV_MOV_B64_IMM_PSEUDO -16, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]]
     %0:sreg_64 = S_MOV_B64 -16, implicit $exec
     %1:av_64_align2 = COPY %0
     S_ENDPGM 0, implicit %1
@@ -346,9 +335,8 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: s_mov_b64_imm_pseudo_literal_32_halves_copy_to_areg_64
-    ; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -42949672960, implicit $exec
-    ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64 = COPY [[S_MOV_B]]
-    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+    ; GCN: [[AV_MOV_:%[0-9]+]]:areg_64 = AV_MOV_B64_IMM_PSEUDO -42949672960, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]]
     %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 18446744030759878656, implicit $exec
     %1:areg_64 = COPY %0
     S_ENDPGM 0, implicit %1
@@ -361,9 +349,8 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: s_mov_b64_imm_pseudo_literal_32_halves_copy_to_areg_64_align2
-    ; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -42949672960, implicit $exec
-    ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64_align2 = COPY [[S_MOV_B]]
-    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+    ; GCN: [[AV_MOV_:%[0-9]+]]:areg_64_align2 = AV_MOV_B64_IMM_PSEUDO -42949672960, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]]
     %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 18446744030759878656, implicit $exec
     %1:areg_64_align2 = COPY %0
     S_ENDPGM 0, implicit %1
@@ -376,9 +363,8 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: s_mov_b64_imm_pseudo_inlineimm_32_halves_copy_to_areg_64
-    ; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -21474836480, implicit $exec
-    ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64 = COPY [[S_MOV_B]]
-    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+    ; GCN: [[AV_MOV_:%[0-9]+]]:areg_64 = AV_MOV_B64_IMM_PSEUDO -21474836480, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]]
     %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 18446744052234715136, implicit $exec
     %1:areg_64 = COPY %0
     S_ENDPGM 0, implicit %1
@@ -391,9 +377,8 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: s_mov_b64_imm_pseudo_inlineimm_32_halves_copy_to_areg_64_align2
-    ; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -21474836480, implicit $exec
-    ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64_align2 = COPY [[S_MOV_B]]
-    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+    ; GCN: [[AV_MOV_:%[0-9]+]]:areg_64_align2 = AV_MOV_B64_IMM_PSEUDO -21474836480, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]]
     %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 18446744052234715136, implicit $exec
     %1:areg_64_align2 = COPY %0
     S_ENDPGM 0, implicit %1
@@ -406,9 +391,8 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: s_mov_b64_imm_pseudo_literal_32_halves_copy_to_av_64
-    ; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -42949672960, implicit $exec
-    ; GCN-NEXT: [[COPY:%[0-9]+]]:av_64 = COPY [[S_MOV_B]]
-    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+    ; GCN: [[AV_MOV_:%[0-9]+]]:av_64 = AV_MOV_B64_IMM_PSEUDO -42949672960, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]]
     %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 18446744030759878656, implicit $exec
     %1:av_64 = COPY %0
     S_ENDPGM 0, implicit %1
@@ -421,9 +405,8 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: s_mov_b64_imm_pseudo_literal_32_halves_copy_to_av_64_align2
-    ; GCN: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -42949672960, implicit $exec
-    ; GCN-NEXT: [[COPY:%[0-9]+]]:av_64_align2 = COPY [[S_MOV_B]]
-    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+    ; GCN: [[AV_MOV_:%[0-9]+]]:av_64_align2 = AV_MOV_B64_IMM_PSEUDO -42949672960, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]]
     %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 18446744030759878656, implicit $exec
     %1:av_64_align2 = COPY %0
     S_ENDPGM 0, implicit %1
diff --git a/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir b/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir
index ddf2aa34ecd87..dfcf9a1f5c5ae 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir
@@ -816,9 +816,8 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: av_mov_b64_imm_pseudo_copy_av_64_to_virtreg_agpr
-    ; GCN: [[AV_MOV_:%[0-9]+]]:av_64 = AV_MOV_B64_IMM_PSEUDO 0, implicit $exec
-    ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_64 = COPY [[AV_MOV_]]
-    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+    ; GCN: [[AV_MOV_:%[0-9]+]]:areg_64 = AV_MOV_B64_IMM_PSEUDO 0, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]]
     %0:av_64 = AV_MOV_B64_IMM_PSEUDO 0, implicit $exec
     %1:areg_64 = COPY %0
     S_ENDPGM 0, implicit %1
@@ -832,9 +831,8 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: av_mov_b64_imm_pseudo_copy_av_64_to_virtreg_vgpr_0
-    ; GCN: [[AV_MOV_:%[0-9]+]]:av_64 = AV_MOV_B64_IMM_PSEUDO 0, implicit $exec
-    ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY [[AV_MOV_]]
-    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+    ; GCN: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 0, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MOV_B]]
     %0:av_64 = AV_MOV_B64_IMM_PSEUDO 0, implicit $exec
     %1:vreg_64 = COPY %0
     S_ENDPGM 0, implicit %1
@@ -848,9 +846,9 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: av_mov_b64_imm_pseudo_copy_av_64_to_virtreg_vgpr_nonsplat_value
-    ; GCN: [[AV_MOV_:%[0-9]+]]:av_64 = AV_MOV_B64_IMM_PSEUDO 274877906961, implicit $exec
-    ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY [[AV_MOV_]]
-    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+    ; GCN: [[AV_MOV_:%[0-9]+]]:vreg_64 = AV_MOV_B64_IMM_PSEUDO 274877906961, implicit $exec
+    ; GCN-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO [[AV_MOV_]], implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MOV_B]]
     %0:av_64 = AV_MOV_B64_IMM_PSEUDO 274877906961, implicit $exec
     %1:vreg_64 = COPY %0
     S_ENDPGM 0, implicit %1
@@ -863,9 +861,8 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: av_mov_b64_imm_pseudo_copy_av_64_to_virtreg_vgpr_nonsplat_value_copy_sub0
-    ; GCN: [[AV_MOV_:%[0-9]+]]:av_64 = AV_MOV_B64_IMM_PSEUDO 274877906961, implicit $exec
-    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[AV_MOV_]].sub0
-    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+    ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 17, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]]
     %0:av_64 = AV_MOV_B64_IMM_PSEUDO 274877906961, implicit $exec
     %1:vgpr_32 = COPY %0.sub0
     S_ENDPGM 0, implicit %1
@@ -878,9 +875,8 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; GCN-LABEL: name: av_mov_b64_imm_pseudo_copy_av_64_to_virtreg_vgpr_nonsplat_value_copy_sub1
-    ; GCN: [[AV_MOV_:%[0-9]+]]:av_64 = AV_MOV_B64_IMM_PSEUDO 274877906961, implicit $exec
-    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[AV_MOV_]].sub1
-    ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
+    ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 64, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]]
     %0:av_64 = AV_MOV_B64_IMM_PSEUDO 274877906961, implicit $exec
     %1:vgpr_32 = COPY %0.sub1
     S_ENDPGM 0, implicit %1

@arsenm arsenm force-pushed the users/arsenm/amdgpu/fold-s-mov-imm-64-into-av-copy branch from e6418df to 49d1410 Compare August 27, 2025 13:39
@arsenm arsenm force-pushed the users/arsenm/amdgpu/refactor-isImmOperandLegal branch from 6aaa543 to e08813a Compare August 27, 2025 13:39
@arsenm arsenm force-pushed the users/arsenm/amdgpu/fold-s-mov-imm-64-into-av-copy branch from 49d1410 to bc2070b Compare September 2, 2025 14:26
@arsenm arsenm force-pushed the users/arsenm/amdgpu/refactor-isImmOperandLegal branch 2 times, most recently from 2ee13ab to 9690466 Compare September 2, 2025 15:38
@arsenm arsenm force-pushed the users/arsenm/amdgpu/fold-s-mov-imm-64-into-av-copy branch from bc2070b to 8961a0c Compare September 2, 2025 15:38
This loop over all the operands in the MachineInstr will eventually
go past the end of the MCInstrDesc's explicit operands. We don't
need the instr desc to compute the constant bus usage, just the
register and whether it's implicit or not. The check here is slightly
conservative. e.g. a random vcc implicit use appended to an instruction
will falsely report a constant bus use.
The goal is to expose more variants that can operate without
preconstructed MachineInstrs or MachineOperands.
This is in preparation for patches which will intoduce more
copies to av registers.
@arsenm arsenm force-pushed the users/arsenm/amdgpu/fold-s-mov-imm-64-into-av-copy branch from 8961a0c to 0582195 Compare September 2, 2025 16:49
@arsenm arsenm force-pushed the users/arsenm/amdgpu/refactor-isImmOperandLegal branch from 9690466 to 20442f6 Compare September 2, 2025 16:49
Base automatically changed from users/arsenm/amdgpu/refactor-isImmOperandLegal to main September 3, 2025 00:06
@arsenm arsenm merged commit dd5eb46 into main Sep 3, 2025
12 of 15 checks passed
@arsenm arsenm deleted the users/arsenm/amdgpu/fold-s-mov-imm-64-into-av-copy branch September 3, 2025 00:30
@vvereschaka
Copy link
Contributor

any fixes?

https://lab.llvm.org/buildbot/#/builders/187/builds/10892

******************** TEST 'LLVM :: CodeGen/AMDGPU/fold-imm-copy-agpr.mir' FAILED ********************
Exit Code: 2
Command Output (stderr):
--
/home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/build/bin/llc -mtriple=amdgcn -mcpu=gfx90a -run-pass=si-fold-operands /home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/llvm-project/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir -o - | /home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/build/bin/FileCheck -check-prefix=GCN /home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/llvm-project/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir # RUN: at line 2
+ /home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/build/bin/llc -mtriple=amdgcn -mcpu=gfx90a -run-pass=si-fold-operands /home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/llvm-project/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir -o -
+ /home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/build/bin/FileCheck -check-prefix=GCN /home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/llvm-project/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir
# After SI Fold Operands
# Machine code for function s_mov_b64_imm_0_copy_to_areg_64: IsSSA, NoPHIs, TracksLiveness
bb.0:
  %1:areg_64 = AV_MOV_B64_IMM_PSEUDO 0, implicit $exec
  S_ENDPGM 0, implicit %1:areg_64
# End machine code for function s_mov_b64_imm_0_copy_to_areg_64.
*** Bad machine code: Subtarget requires even aligned vector registers ***
- function:    s_mov_b64_imm_0_copy_to_areg_64
- basic block: %bb.0  (0x5cd23aa6d5b8)
- instruction: %1:areg_64 = AV_MOV_B64_IMM_PSEUDO 0, implicit $exec
*** Bad machine code: Illegal virtual register for instruction ***
- function:    s_mov_b64_imm_0_copy_to_areg_64
- basic block: %bb.0  (0x5cd23aa6d5b8)
- instruction: %1:areg_64 = AV_MOV_B64_IMM_PSEUDO 0, implicit $exec
- operand 0:   %1:areg_64
Expected a AV_64_Align2 register, but got a AReg_64 register
LLVM ERROR: Found 2 machine code errors.
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
Stack dump:
0.	Program arguments: /home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/build/bin/llc -mtriple=amdgcn -mcpu=gfx90a -run-pass=si-fold-operands /home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/llvm-project/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir -o -
1.	Running pass 'Function Pass Manager' on module '/home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/llvm-project/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir'.
2.	Running pass 'Verify generated machine code' on function '@s_mov_b64_imm_0_copy_to_areg_64'
 #0 0x00005cd2348ee4c8 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (/home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/build/bin/llc+0x78764c8)
 #1 0x00005cd2348ebbd5 llvm::sys::RunSignalHandlers() (/home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/build/bin/llc+0x7873bd5)
 #2 0x00005cd2348ef291 SignalHandler(int, siginfo_t*, void*) Signals.cpp:0:0
 #3 0x00007fc865c45330 (/lib/x86_64-linux-gnu/libc.so.6+0x45330)
 #4 0x00007fc865c9eb2c pthread_kill (/lib/x86_64-linux-gnu/libc.so.6+0x9eb2c)
 #5 0x00007fc865c4527e raise (/lib/x86_64-linux-gnu/libc.so.6+0x4527e)
 #6 0x00007fc865c288ff abort (/lib/x86_64-linux-gnu/libc.so.6+0x288ff)
 #7 0x00005cd234854365 llvm::report_fatal_error(llvm::Twine const&, bool) (/home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/build/bin/llc+0x77dc365)
 #8 0x00005cd233a3222e (/home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/build/bin/llc+0x69ba22e)
 #9 0x00005cd233a3320b (anonymous namespace)::MachineVerifierLegacyPass::runOnMachineFunction(llvm::MachineFunction&) MachineVerifier.cpp:0:0
#10 0x00005cd2339101f3 llvm::MachineFunctionPass::runOnFunction(llvm::Function&) (/home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/build/bin/llc+0x68981f3)
#11 0x00005cd233e691cb llvm::FPPassManager::runOnFunction(llvm::Function&) (/home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/build/bin/llc+0x6df11cb)
#12 0x00005cd233e71262 llvm::FPPassManager::runOnModule(llvm::Module&) (/home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/build/bin/llc+0x6df9262)
#13 0x00005cd233e69cea llvm::legacy::PassManagerImpl::run(llvm::Module&) (/home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/build/bin/llc+0x6df1cea)
#14 0x00005cd231873519 compileModule(char**, llvm::LLVMContext&) llc.cpp:0:0
#15 0x00005cd231870b5d main (/home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/build/bin/llc+0x47f8b5d)
#16 0x00007fc865c2a1ca (/lib/x86_64-linux-gnu/libc.so.6+0x2a1ca)
#17 0x00007fc865c2a28b __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x2a28b)
#18 0x00005cd23186caa5 _start (/home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/build/bin/llc+0x47f4aa5)
FileCheck error: '<stdin>' is empty.
FileCheck command line:  /home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/build/bin/FileCheck -check-prefix=GCN /home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/llvm-project/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir
--
********************

@arsenm
Copy link
Contributor Author

arsenm commented Sep 4, 2025

any fixes?

dc170c7

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

6 participants