Skip to content

Commit 8cf32f0

Browse files
committed
[AMDGPU] Merge 8/16-bit tbuffer 3-comp into 4-comp (XYZ)
1 parent 15a9d34 commit 8cf32f0

File tree

2 files changed

+79
-49
lines changed

2 files changed

+79
-49
lines changed

llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1068,9 +1068,25 @@ bool SILoadStoreOptimizer::offsetsCanBeCombined(CombineInfo &CI,
10681068
Info0->NumFormat != Info1->NumFormat)
10691069
return false;
10701070

1071+
// For 8-bit or 16-bit formats there is no 3-component variant.
1072+
// If NumCombinedComponents is 3, try the 4-component format and use XYZ.
1073+
// Example:
1074+
// tbuffer_load_format_x + tbuffer_load_format_x + tbuffer_load_format_x
1075+
// ==> tbuffer_load_format_xyz with format:[BUF_FMT_16_16_16_16_SNORM]
10711076
unsigned NumCombinedComponents = CI.Width + Paired.Width;
1072-
if (getBufferFormatWithCompCount(CI.Format, NumCombinedComponents, STI) ==
1073-
0)
1077+
unsigned CombinedBufferFormat =
1078+
getBufferFormatWithCompCount(CI.Format, NumCombinedComponents, STI);
1079+
if (CombinedBufferFormat == 0 && NumCombinedComponents == 3) {
1080+
if (Info0->BitsPerComp == 8 || Info0->BitsPerComp == 16) {
1081+
unsigned TryFormat = getBufferFormatWithCompCount(CI.Format, 4, STI);
1082+
if (!TryFormat)
1083+
return false;
1084+
CombinedBufferFormat = TryFormat;
1085+
NumCombinedComponents = 4;
1086+
}
1087+
}
1088+
1089+
if (CombinedBufferFormat == 0)
10741090
return false;
10751091

10761092
// Merge only when the two access ranges are strictly back-to-back,
@@ -1102,7 +1118,7 @@ bool SILoadStoreOptimizer::offsetsCanBeCombined(CombineInfo &CI,
11021118
// Handle all non-DS instructions.
11031119
if ((CI.InstClass != DS_READ) && (CI.InstClass != DS_WRITE)) {
11041120
if (EltOffset0 + CI.Width != EltOffset1 &&
1105-
EltOffset1 + Paired.Width != EltOffset0)
1121+
EltOffset1 + Paired.Width != EltOffset0)
11061122
return false;
11071123
// Instructions with scale_offset modifier cannot be combined unless we
11081124
// also generate a code to scale the offset and reset that bit.
@@ -1621,8 +1637,14 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferLoadPair(
16211637
if (Regs.VAddr)
16221638
MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::vaddr));
16231639

1640+
// For 8- or 16-bit tbuffer formats there is no 3-component encoding.
1641+
// If the combined count is 3 (e.g. X+X+X or XY+X), promote to 4 components
1642+
// and use XYZ of XYZW to enable the merge.
1643+
unsigned NumCombinedComponents = CI.Width + Paired.Width;
1644+
if (NumCombinedComponents == 3 && (CI.EltSize == 1 || CI.EltSize == 2))
1645+
NumCombinedComponents = 4;
16241646
unsigned JoinedFormat =
1625-
getBufferFormatWithCompCount(CI.Format, CI.Width + Paired.Width, *STM);
1647+
getBufferFormatWithCompCount(CI.Format, NumCombinedComponents, *STM);
16261648

16271649
// It shouldn't be possible to get this far if the two instructions
16281650
// don't have a single memoperand, because MachineInstr::mayAlias()
@@ -1664,8 +1686,14 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferStorePair(
16641686
if (Regs.VAddr)
16651687
MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::vaddr));
16661688

1689+
// For 8- or 16-bit tbuffer formats there is no 3-component encoding.
1690+
// If the combined count is 3 (e.g. X+X+X or XY+X), promote to 4 components
1691+
// and use XYZ of XYZW to enable the merge.
1692+
unsigned NumCombinedComponents = CI.Width + Paired.Width;
1693+
if (NumCombinedComponents == 3 && (CI.EltSize == 1 || CI.EltSize == 2))
1694+
NumCombinedComponents = 4;
16671695
unsigned JoinedFormat =
1668-
getBufferFormatWithCompCount(CI.Format, CI.Width + Paired.Width, *STM);
1696+
getBufferFormatWithCompCount(CI.Format, NumCombinedComponents, *STM);
16691697

16701698
// It shouldn't be possible to get this far if the two instructions
16711699
// don't have a single memoperand, because MachineInstr::mayAlias()

llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx11.mir

Lines changed: 46 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1536,10 +1536,11 @@ body: |
15361536
; GFX11-NEXT: {{ $}}
15371537
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
15381538
; GFX11-NEXT: %rsrc:sgpr_128 = REG_SEQUENCE $sgpr0, %subreg.sub0, $sgpr1, %subreg.sub1, $sgpr2, %subreg.sub2, $sgpr3, %subreg.sub3
1539-
; GFX11-NEXT: [[TBUFFER_LOAD_FORMAT_XY_IDXEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_XY_IDXEN [[COPY]], %rsrc, 0, 0, 29, 0, 0, implicit $exec :: (dereferenceable load (s32), align 2, addrspace 8)
1540-
; GFX11-NEXT: %x0:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XY_IDXEN]].sub0
1541-
; GFX11-NEXT: %x1:vgpr_32 = COPY killed [[TBUFFER_LOAD_FORMAT_XY_IDXEN]].sub1
1542-
; GFX11-NEXT: %x2:vgpr_32 = TBUFFER_LOAD_FORMAT_X_IDXEN [[COPY]], %rsrc, 0, 4, 13, 0, 0, implicit $exec :: (dereferenceable load (s16), addrspace 8)
1539+
; GFX11-NEXT: [[TBUFFER_LOAD_FORMAT_XYZ_IDXEN:%[0-9]+]]:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_IDXEN [[COPY]], %rsrc, 0, 0, 57, 0, 0, implicit $exec :: (dereferenceable load (s48), align 2, addrspace 8)
1540+
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_IDXEN]].sub0_sub1
1541+
; GFX11-NEXT: %x2:vgpr_32 = COPY killed [[TBUFFER_LOAD_FORMAT_XYZ_IDXEN]].sub2
1542+
; GFX11-NEXT: %x0:vgpr_32 = COPY [[COPY1]].sub0
1543+
; GFX11-NEXT: %x1:vgpr_32 = COPY killed [[COPY1]].sub1
15431544
%0:vgpr_32 = COPY $vgpr0
15441545
%rsrc:sgpr_128 = REG_SEQUENCE $sgpr0, %subreg.sub0, $sgpr1,%subreg.sub1, $sgpr2, %subreg.sub2, $sgpr3, %subreg.sub3
15451546
%x0:vgpr_32 = TBUFFER_LOAD_FORMAT_X_IDXEN %0, %rsrc, 0, 0, 13, 0, 0, implicit $exec :: (dereferenceable load (s16),align 2,addrspace 8)
@@ -1563,19 +1564,19 @@ body: |
15631564
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3
15641565
; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
15651566
; GFX11-NEXT: [[TBUFFER_LOAD_FORMAT_XYZW_IDXEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY5]], [[REG_SEQUENCE]], 0, 0, 57, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 8)
1566-
; GFX11-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_IDXEN]].sub0_sub1
1567-
; GFX11-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY killed [[TBUFFER_LOAD_FORMAT_XYZW_IDXEN]].sub2_sub3
1568-
; GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY6]].sub0
1569-
; GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY killed [[COPY6]].sub1
1570-
; GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub0
1571-
; GFX11-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY killed [[COPY7]].sub1
1567+
; GFX11-NEXT: [[COPY6:%[0-9]+]]:vreg_96 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_IDXEN]].sub0_sub1_sub2
1568+
; GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY killed [[TBUFFER_LOAD_FORMAT_XYZW_IDXEN]].sub3
1569+
; GFX11-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[COPY6]].sub0_sub1
1570+
; GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY killed [[COPY6]].sub2
1571+
; GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY8]].sub0
1572+
; GFX11-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY killed [[COPY8]].sub1
15721573
; GFX11-NEXT: [[TBUFFER_LOAD_FORMAT_XYZW_IDXEN1:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY5]], [[REG_SEQUENCE]], 0, 16, 57, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 8)
1573-
; GFX11-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_IDXEN1]].sub0_sub1
1574-
; GFX11-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY killed [[TBUFFER_LOAD_FORMAT_XYZW_IDXEN1]].sub2_sub3
1575-
; GFX11-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY12]].sub0
1576-
; GFX11-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY killed [[COPY12]].sub1
1577-
; GFX11-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY13]].sub0
1578-
; GFX11-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY killed [[COPY13]].sub1
1574+
; GFX11-NEXT: [[COPY12:%[0-9]+]]:vreg_96 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_IDXEN1]].sub0_sub1_sub2
1575+
; GFX11-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY killed [[TBUFFER_LOAD_FORMAT_XYZW_IDXEN1]].sub3
1576+
; GFX11-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[COPY12]].sub0_sub1
1577+
; GFX11-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY killed [[COPY12]].sub2
1578+
; GFX11-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY14]].sub0
1579+
; GFX11-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY killed [[COPY14]].sub1
15791580
; GFX11-NEXT: [[TBUFFER_LOAD_FORMAT_X_IDXEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_IDXEN [[COPY5]], [[REG_SEQUENCE]], 0, 24, 13, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
15801581
%4:sgpr_32 = COPY $sgpr4
15811582
%3:sgpr_32 = COPY $sgpr3
@@ -1678,8 +1679,8 @@ body: |
16781679
; GFX11-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
16791680
; GFX11-NEXT: %rsrc:sgpr_128 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY3]], %subreg.sub3
16801681
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
1681-
; GFX11-NEXT: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed [[REG_SEQUENCE]], %rsrc, 0, 0, 29, 0, 0, implicit $exec :: (store (s32), align 2, addrspace 4)
1682-
; GFX11-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact [[COPY2]], %rsrc, 0, 4, 13, 0, 0, implicit $exec :: (store (s16), addrspace 4)
1682+
; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2
1683+
; GFX11-NEXT: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed [[REG_SEQUENCE1]], %rsrc, 0, 0, 57, 0, 0, implicit $exec :: (store (s48), align 2, addrspace 4)
16831684
%4:vgpr_32 = COPY $vgpr0
16841685
%5:vgpr_32 = COPY $vgpr1
16851686
%6:vgpr_32 = COPY $vgpr2
@@ -1716,12 +1717,12 @@ body: |
17161717
; GFX11-NEXT: [[COPY12:%[0-9]+]]:sgpr_32 = COPY $sgpr0
17171718
; GFX11-NEXT: %rsrc:sgpr_128 = REG_SEQUENCE [[COPY12]], %subreg.sub0, [[COPY11]], %subreg.sub1, [[COPY10]], %subreg.sub2, [[COPY9]], %subreg.sub3
17181719
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1
1719-
; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
1720-
; GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, killed [[REG_SEQUENCE1]], %subreg.sub2_sub3
1720+
; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, [[COPY6]], %subreg.sub2
1721+
; GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[COPY5]], %subreg.sub3
17211722
; GFX11-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[REG_SEQUENCE2]], %rsrc, 0, 0, 57, 0, 0, implicit $exec :: (store (s64), align 2, addrspace 4)
17221723
; GFX11-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
1723-
; GFX11-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
1724-
; GFX11-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[REG_SEQUENCE3]], %subreg.sub0_sub1, killed [[REG_SEQUENCE4]], %subreg.sub2_sub3
1724+
; GFX11-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE3]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2
1725+
; GFX11-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[REG_SEQUENCE4]], %subreg.sub0_sub1_sub2, [[COPY1]], %subreg.sub3
17251726
; GFX11-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[REG_SEQUENCE5]], %rsrc, 0, 8, 57, 0, 0, implicit $exec :: (store (s64), align 2, addrspace 4)
17261727
; GFX11-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact [[COPY]], %rsrc, 0, 16, 13, 0, 0, implicit $exec :: (store (s16), addrspace 4)
17271728
%12:vgpr_32 = COPY $vgpr8
@@ -1804,10 +1805,11 @@ body: |
18041805
; GFX11-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
18051806
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3
18061807
; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
1807-
; GFX11-NEXT: [[TBUFFER_LOAD_FORMAT_XY_IDXEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_XY_IDXEN [[COPY5]], [[REG_SEQUENCE]], 0, 0, 18, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8)
1808-
; GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XY_IDXEN]].sub0
1809-
; GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY killed [[TBUFFER_LOAD_FORMAT_XY_IDXEN]].sub1
1810-
; GFX11-NEXT: [[TBUFFER_LOAD_FORMAT_X_IDXEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_IDXEN [[COPY5]], [[REG_SEQUENCE]], 0, 2, 5, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 8)
1808+
; GFX11-NEXT: [[TBUFFER_LOAD_FORMAT_XYZ_IDXEN:%[0-9]+]]:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_IDXEN [[COPY5]], [[REG_SEQUENCE]], 0, 0, 46, 0, 0, implicit $exec :: (dereferenceable load (s24), align 1, addrspace 8)
1809+
; GFX11-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_IDXEN]].sub0_sub1
1810+
; GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY killed [[TBUFFER_LOAD_FORMAT_XYZ_IDXEN]].sub2
1811+
; GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY6]].sub0
1812+
; GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY killed [[COPY6]].sub1
18111813
%4:sgpr_32 = COPY $sgpr4
18121814
%3:sgpr_32 = COPY $sgpr3
18131815
%2:sgpr_32 = COPY $sgpr2
@@ -1836,19 +1838,19 @@ body: |
18361838
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3
18371839
; GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
18381840
; GFX11-NEXT: [[TBUFFER_LOAD_FORMAT_XYZW_IDXEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY5]], [[REG_SEQUENCE]], 0, 0, 46, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
1839-
; GFX11-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_IDXEN]].sub0_sub1
1840-
; GFX11-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY killed [[TBUFFER_LOAD_FORMAT_XYZW_IDXEN]].sub2_sub3
1841-
; GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY6]].sub0
1842-
; GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY killed [[COPY6]].sub1
1843-
; GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub0
1844-
; GFX11-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY killed [[COPY7]].sub1
1841+
; GFX11-NEXT: [[COPY6:%[0-9]+]]:vreg_96 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_IDXEN]].sub0_sub1_sub2
1842+
; GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY killed [[TBUFFER_LOAD_FORMAT_XYZW_IDXEN]].sub3
1843+
; GFX11-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[COPY6]].sub0_sub1
1844+
; GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY killed [[COPY6]].sub2
1845+
; GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY8]].sub0
1846+
; GFX11-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY killed [[COPY8]].sub1
18451847
; GFX11-NEXT: [[TBUFFER_LOAD_FORMAT_XYZW_IDXEN1:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY5]], [[REG_SEQUENCE]], 0, 4, 46, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
1846-
; GFX11-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_IDXEN1]].sub0_sub1
1847-
; GFX11-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY killed [[TBUFFER_LOAD_FORMAT_XYZW_IDXEN1]].sub2_sub3
1848-
; GFX11-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY12]].sub0
1849-
; GFX11-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY killed [[COPY12]].sub1
1850-
; GFX11-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY13]].sub0
1851-
; GFX11-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY killed [[COPY13]].sub1
1848+
; GFX11-NEXT: [[COPY12:%[0-9]+]]:vreg_96 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_IDXEN1]].sub0_sub1_sub2
1849+
; GFX11-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY killed [[TBUFFER_LOAD_FORMAT_XYZW_IDXEN1]].sub3
1850+
; GFX11-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[COPY12]].sub0_sub1
1851+
; GFX11-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY killed [[COPY12]].sub2
1852+
; GFX11-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY14]].sub0
1853+
; GFX11-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY killed [[COPY14]].sub1
18521854
; GFX11-NEXT: [[TBUFFER_LOAD_FORMAT_X_IDXEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_IDXEN [[COPY5]], [[REG_SEQUENCE]], 0, 8, 5, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 8)
18531855
%4:sgpr_32 = COPY $sgpr4
18541856
%3:sgpr_32 = COPY $sgpr3
@@ -1971,8 +1973,8 @@ body: |
19711973
; GFX11-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
19721974
; GFX11-NEXT: %rsrc:sgpr_128 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY3]], %subreg.sub3
19731975
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
1974-
; GFX11-NEXT: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed [[REG_SEQUENCE]], %rsrc, 0, 0, 18, 0, 0, implicit $exec :: (store (s16), align 1, addrspace 4)
1975-
; GFX11-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact [[COPY]], %rsrc, 0, 2, 5, 0, 0, implicit $exec :: (store (s8), addrspace 4)
1976+
; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2
1977+
; GFX11-NEXT: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed [[REG_SEQUENCE1]], %rsrc, 0, 0, 46, 0, 0, implicit $exec :: (store (s24), align 1, addrspace 4)
19761978
%6:vgpr_32 = COPY $vgpr2
19771979
%5:vgpr_32 = COPY $vgpr1
19781980
%4:vgpr_32 = COPY $vgpr0
@@ -2009,12 +2011,12 @@ body: |
20092011
; GFX11-NEXT: [[COPY12:%[0-9]+]]:sgpr_32 = COPY $sgpr0
20102012
; GFX11-NEXT: %rsrc:sgpr_128 = REG_SEQUENCE [[COPY12]], %subreg.sub0, [[COPY11]], %subreg.sub1, [[COPY10]], %subreg.sub2, [[COPY9]], %subreg.sub3
20112013
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1
2012-
; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
2013-
; GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, killed [[REG_SEQUENCE1]], %subreg.sub2_sub3
2014+
; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, [[COPY6]], %subreg.sub2
2015+
; GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[COPY5]], %subreg.sub3
20142016
; GFX11-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[REG_SEQUENCE2]], %rsrc, 0, 0, 46, 0, 0, implicit $exec :: (store (s32), align 1, addrspace 4)
20152017
; GFX11-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
2016-
; GFX11-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
2017-
; GFX11-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[REG_SEQUENCE3]], %subreg.sub0_sub1, killed [[REG_SEQUENCE4]], %subreg.sub2_sub3
2018+
; GFX11-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_96 = REG_SEQUENCE killed [[REG_SEQUENCE3]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2
2019+
; GFX11-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[REG_SEQUENCE4]], %subreg.sub0_sub1_sub2, [[COPY1]], %subreg.sub3
20182020
; GFX11-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[REG_SEQUENCE5]], %rsrc, 0, 4, 46, 0, 0, implicit $exec :: (store (s32), align 1, addrspace 4)
20192021
; GFX11-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact [[COPY]], %rsrc, 0, 8, 5, 0, 0, implicit $exec :: (store (s8), addrspace 4)
20202022
%12:vgpr_32 = COPY $vgpr8

0 commit comments

Comments
 (0)