Skip to content

Commit 921341f

Browse files
committed
fix v_mov_b16_t16 indexing
1 parent 8f2466b commit 921341f

File tree

4 files changed

+55
-1
lines changed

4 files changed

+55
-1
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -931,7 +931,9 @@ static MachineOperand *lookUpCopyChain(const SIInstrInfo &TII,
931931
for (MachineInstr *SubDef = MRI.getVRegDef(SrcReg);
932932
SubDef && TII.isFoldableCopy(*SubDef);
933933
SubDef = MRI.getVRegDef(Sub->getReg())) {
934-
MachineOperand &SrcOp = SubDef->getOperand(1);
934+
unsigned SrcIdx = TII.getFoldableCopySrcIdx(*SubDef);
935+
MachineOperand &SrcOp = SubDef->getOperand(SrcIdx);
936+
935937
if (SrcOp.isImm())
936938
return &SrcOp;
937939
if (!SrcOp.isReg() || SrcOp.getReg().isPhysical())

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3435,6 +3435,32 @@ bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) {
34353435
}
34363436
}
34373437

3438+
unsigned SIInstrInfo::getFoldableCopySrcIdx(const MachineInstr &MI) {
3439+
switch (MI.getOpcode()) {
3440+
case AMDGPU::V_MOV_B16_t16_e32:
3441+
case AMDGPU::V_MOV_B16_t16_e64:
3442+
return 2;
3443+
case AMDGPU::V_MOV_B32_e32:
3444+
case AMDGPU::V_MOV_B32_e64:
3445+
case AMDGPU::V_MOV_B64_PSEUDO:
3446+
case AMDGPU::V_MOV_B64_e32:
3447+
case AMDGPU::V_MOV_B64_e64:
3448+
case AMDGPU::S_MOV_B32:
3449+
case AMDGPU::S_MOV_B64:
3450+
case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3451+
case AMDGPU::COPY:
3452+
case AMDGPU::WWM_COPY:
3453+
case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3454+
case AMDGPU::V_ACCVGPR_READ_B32_e64:
3455+
case AMDGPU::V_ACCVGPR_MOV_B32:
3456+
case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
3457+
case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
3458+
return 1;
3459+
default:
3460+
llvm_unreachable("MI is not a foldable copy");
3461+
}
3462+
}
3463+
34383464
static constexpr AMDGPU::OpName ModifierOpNames[] = {
34393465
AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
34403466
AMDGPU::OpName::src2_modifiers, AMDGPU::OpName::clamp,

llvm/lib/Target/AMDGPU/SIInstrInfo.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -417,6 +417,7 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
417417
const MachineInstr &MIb) const override;
418418

419419
static bool isFoldableCopy(const MachineInstr &MI);
420+
static unsigned getFoldableCopySrcIdx(const MachineInstr &MI);
420421

421422
void removeModOperands(MachineInstr &MI) const;
422423

llvm/test/CodeGen/AMDGPU/true16-fold.mir

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ body: |
5757
%4:vgpr_16 = COPY %3:sgpr_lo16
5858
%5:vgpr_32 = V_ALIGNBIT_B32_t16_e64 0, %0:sreg_32, 0, killed %1:sreg_32, 0, killed %4:vgpr_16, 0, 0, implicit $exec
5959
S_ENDPGM 0, implicit %5
60+
...
6061

6162
---
6263
name: fold_16bit_madmix_clamp
@@ -207,3 +208,27 @@ body: |
207208
$vgpr0 = COPY %4
208209
S_ENDPGM 0, implicit $vgpr0
209210
...
211+
212+
---
213+
name: fold_imm16_across_reg_sequence
214+
tracksRegLiveness: true
215+
registers:
216+
body: |
217+
bb.0:
218+
liveins: $vgpr0, $vgpr1, $vgpr2
219+
; CHECK-LABEL: name: fold_imm16_across_reg_sequence
220+
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
221+
; CHECK-NEXT: {{ $}}
222+
; CHECK-NEXT: [[V_MOV_B16_t16_e64_:%[0-9]+]]:vgpr_16 = V_MOV_B16_t16_e64 0, -1, 0, implicit $exec
223+
; CHECK-NEXT: [[V_MOV_B16_t16_e64_1:%[0-9]+]]:vgpr_16 = V_MOV_B16_t16_e64 0, -1, 0, implicit $exec
224+
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_MOV_B16_t16_e64_]], %subreg.lo16, [[V_MOV_B16_t16_e64_1]], %subreg.hi16
225+
; CHECK-NEXT: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e64 0, -1, 0, -1, 0, 0, implicit $mode, implicit $exec
226+
; CHECK-NEXT: $vgpr0 = COPY [[V_MAX_F32_e64_]]
227+
; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0
228+
%0:vgpr_16 = V_MOV_B16_t16_e64 0, -1, 0, implicit $exec
229+
%1:vgpr_16 = V_MOV_B16_t16_e64 0, -1, 0, implicit $exec
230+
%2:vgpr_32 = REG_SEQUENCE %0, %subreg.lo16, %1, %subreg.hi16
231+
%3:vgpr_32 = nofpexcept V_MAX_F32_e64 0, %2, 0, %2, 0, 0, implicit $mode, implicit $exec
232+
$vgpr0 = COPY %3
233+
S_ENDPGM 0, implicit $vgpr0
234+
...

0 commit comments

Comments
 (0)