@@ -14,11 +14,9 @@ namespace llvm::AMDGPU {
1414// ===----------------------------------------------------------------------===//
1515// Custom Operands.
1616//
17- // A table of custom operands shall describe "primary" operand names first
18- // followed by aliases if any. It is not required but recommended to arrange
19- // operands so that operand encoding match operand position in the table. This
20- // will make getNameFromOperandTable() a bit more efficient. Unused slots in the
21- // table shall have an empty name.
17+ // A table of custom operands must be ordered by Encoding in ascending order
18+ // to enable binary search lookup. Within entries that share the same encoding,
19+ // "primary" operand names should be listed first followed by aliases if any.
2220//
2321// ===----------------------------------------------------------------------===//
2422
@@ -27,21 +25,18 @@ template <size_t N>
2725static StringRef getNameFromOperandTable (const CustomOperand (&Table)[N],
2826 unsigned Encoding,
2927 const MCSubtargetInfo &STI) {
30- auto isValidIndexForEncoding = [&](size_t Idx) {
31- return Idx < N && Table[Idx].Encoding == Encoding &&
32- !Table[Idx].Name .empty () &&
33- (!Table[Idx].Cond || Table[Idx].Cond (STI));
34- };
35-
36- // This is an optimization that should work in most cases. As a side effect,
37- // it may cause selection of an alias instead of a primary operand name in
38- // case of sparse tables.
39- if (isValidIndexForEncoding (Encoding))
40- return Table[Encoding].Name ;
41-
42- for (size_t Idx = 0 ; Idx != N; ++Idx)
43- if (isValidIndexForEncoding (Idx))
44- return Table[Idx].Name ;
28+ // Find the first entry with the target encoding
29+ auto First =
30+ std::lower_bound (Table, Table + N, Encoding,
31+ [](const CustomOperand &Entry, unsigned TargetEncoding) {
32+ return Entry.Encoding < TargetEncoding;
33+ });
34+
35+ // Search through entries with the same encoding to find the first valid one
36+ for (auto It = First; It != Table + N && It->Encoding == Encoding; ++It) {
37+ if (It->Encoding == Encoding && (!It->Cond || It->Cond (STI)))
38+ return It->Name ;
39+ }
4540
4641 return " " ;
4742}
@@ -92,21 +87,20 @@ namespace SendMsg {
9287// clang-format off
9388
9489static constexpr CustomOperand MsgOperands[] = {
95- {{" " }},
9690 {{" MSG_INTERRUPT" }, ID_INTERRUPT},
9791 {{" MSG_GS" }, ID_GS_PreGFX11, isNotGFX11Plus},
92+ {{" MSG_HS_TESSFACTOR" }, ID_HS_TESSFACTOR_GFX11Plus, isGFX11Plus},
9893 {{" MSG_GS_DONE" }, ID_GS_DONE_PreGFX11, isNotGFX11Plus},
94+ {{" MSG_DEALLOC_VGPRS" }, ID_DEALLOC_VGPRS_GFX11Plus, isGFX11Plus},
9995 {{" MSG_SAVEWAVE" }, ID_SAVEWAVE, isGFX8_GFX9_GFX10},
10096 {{" MSG_STALL_WAVE_GEN" }, ID_STALL_WAVE_GEN, isGFX9_GFX10_GFX11},
10197 {{" MSG_HALT_WAVES" }, ID_HALT_WAVES, isGFX9_GFX10_GFX11},
10298 {{" MSG_ORDERED_PS_DONE" }, ID_ORDERED_PS_DONE, isGFX9_GFX10},
10399 {{" MSG_EARLY_PRIM_DEALLOC" }, ID_EARLY_PRIM_DEALLOC, isGFX9_GFX10},
104100 {{" MSG_GS_ALLOC_REQ" }, ID_GS_ALLOC_REQ, isGFX9Plus},
105101 {{" MSG_GET_DOORBELL" }, ID_GET_DOORBELL, isGFX9_GFX10},
106- {{" MSG_GET_DDID" }, ID_GET_DDID, isGFX10},
107- {{" MSG_HS_TESSFACTOR" }, ID_HS_TESSFACTOR_GFX11Plus, isGFX11Plus},
108- {{" MSG_DEALLOC_VGPRS" }, ID_DEALLOC_VGPRS_GFX11Plus, isGFX11Plus},
109102 {{" MSG_SAVEWAVE_HAS_TDM" }, ID_SAVEWAVE_HAS_TDM, isGFX1250},
103+ {{" MSG_GET_DDID" }, ID_GET_DDID, isGFX10},
110104 {{" MSG_SYSMSG" }, ID_SYSMSG},
111105 {{" MSG_RTN_GET_DOORBELL" }, ID_RTN_GET_DOORBELL, isGFX11Plus},
112106 {{" MSG_RTN_GET_DDID" }, ID_RTN_GET_DDID, isGFX11Plus},
@@ -121,7 +115,6 @@ static constexpr CustomOperand MsgOperands[] = {
121115};
122116
123117static constexpr CustomOperand SysMsgOperands[] = {
124- {{" " }},
125118 {{" SYSMSG_OP_ECC_ERR_INTERRUPT" }, OP_SYS_ECC_ERR_INTERRUPT},
126119 {{" SYSMSG_OP_REG_RD" }, OP_SYS_REG_RD},
127120 {{" SYSMSG_OP_HOST_TRAP_ACK" }, OP_SYS_HOST_TRAP_ACK, isNotGFX9Plus},
@@ -169,68 +162,67 @@ namespace Hwreg {
169162// NOLINTBEGIN
170163// clang-format off
171164static constexpr CustomOperand Operands[] = {
172- {{" " }},
173- {{" HW_REG_MODE" }, ID_MODE},
174- {{" HW_REG_STATUS" }, ID_STATUS},
175- {{" HW_REG_TRAPSTS" }, ID_TRAPSTS, isNotGFX12Plus},
176- {{" HW_REG_HW_ID" }, ID_HW_ID, isNotGFX10Plus},
177- {{" HW_REG_GPR_ALLOC" }, ID_GPR_ALLOC},
178- {{" HW_REG_LDS_ALLOC" }, ID_LDS_ALLOC},
179- {{" HW_REG_IB_STS" }, ID_IB_STS},
180- {{" " }},
181- {{" " }},
182- {{" HW_REG_PERF_SNAPSHOT_DATA" }, ID_PERF_SNAPSHOT_DATA_gfx12, isGFX12Plus},
183- {{" HW_REG_PERF_SNAPSHOT_PC_LO" }, ID_PERF_SNAPSHOT_PC_LO_gfx12, isGFX12Plus},
184- {{" HW_REG_PERF_SNAPSHOT_PC_HI" }, ID_PERF_SNAPSHOT_PC_HI_gfx12, isGFX12Plus},
185- {{" " }},
186- {{" " }},
187- {{" HW_REG_SH_MEM_BASES" }, ID_MEM_BASES, isGFX9_GFX10_GFX11},
188- {{" HW_REG_TBA_LO" }, ID_TBA_LO, isGFX9_GFX10},
189- {{" HW_REG_TBA_HI" }, ID_TBA_HI, isGFX9_GFX10},
190- {{" HW_REG_TMA_LO" }, ID_TMA_LO, isGFX9_GFX10},
191- {{" HW_REG_TMA_HI" }, ID_TMA_HI, isGFX9_GFX10},
192- {{" HW_REG_FLAT_SCR_LO" }, ID_FLAT_SCR_LO, isGFX10_GFX11},
193- {{" HW_REG_FLAT_SCR_HI" }, ID_FLAT_SCR_HI, isGFX10_GFX11},
194- {{" HW_REG_XNACK_MASK" }, ID_XNACK_MASK, isGFX10Before1030},
195- {{" HW_REG_HW_ID1" }, ID_HW_ID1, isGFX10Plus},
196- {{" HW_REG_HW_ID2" }, ID_HW_ID2, isGFX10Plus},
197- {{" HW_REG_POPS_PACKER" }, ID_POPS_PACKER, isGFX10},
198- {{" " }},
199- {{" HW_REG_PERF_SNAPSHOT_DATA" }, ID_PERF_SNAPSHOT_DATA_gfx11, isGFX11},
200- {{" HW_REG_IB_STS2" }, ID_IB_STS2, isGFX1250},
201- {{" HW_REG_SHADER_CYCLES" }, ID_SHADER_CYCLES, isGFX10_3_GFX11},
202- {{" HW_REG_SHADER_CYCLES_HI" }, ID_SHADER_CYCLES_HI, isGFX12Plus},
203- {{" HW_REG_DVGPR_ALLOC_LO" }, ID_DVGPR_ALLOC_LO, isGFX12Plus},
204- {{" HW_REG_DVGPR_ALLOC_HI" }, ID_DVGPR_ALLOC_HI, isGFX12Plus},
205-
206- // Register numbers reused in GFX11
207- {{" HW_REG_PERF_SNAPSHOT_PC_LO" }, ID_PERF_SNAPSHOT_PC_LO_gfx11, isGFX11},
208- {{" HW_REG_PERF_SNAPSHOT_PC_HI" }, ID_PERF_SNAPSHOT_PC_HI_gfx11, isGFX11},
209-
210- // Register numbers reused in GFX12+
211- {{" HW_REG_STATE_PRIV" }, ID_STATE_PRIV, isGFX12Plus},
212- {{" HW_REG_PERF_SNAPSHOT_DATA1" }, ID_PERF_SNAPSHOT_DATA1, isGFX12Plus},
213- {{" HW_REG_PERF_SNAPSHOT_DATA2" }, ID_PERF_SNAPSHOT_DATA2, isGFX12Plus},
214- {{" HW_REG_EXCP_FLAG_PRIV" }, ID_EXCP_FLAG_PRIV, isGFX12Plus},
215- {{" HW_REG_EXCP_FLAG_USER" }, ID_EXCP_FLAG_USER, isGFX12Plus},
216- {{" HW_REG_TRAP_CTRL" }, ID_TRAP_CTRL, isGFX12Plus},
217- {{" HW_REG_SCRATCH_BASE_LO" }, ID_FLAT_SCR_LO, isGFX12Plus},
218- {{" HW_REG_SCRATCH_BASE_HI" }, ID_FLAT_SCR_HI, isGFX12Plus},
219- {{" HW_REG_SHADER_CYCLES_LO" }, ID_SHADER_CYCLES, isGFX12Plus},
220-
221- // GFX942 specific registers
222- {{" HW_REG_XCC_ID" }, ID_XCC_ID, isGFX940},
223- {{" HW_REG_SQ_PERF_SNAPSHOT_DATA" }, ID_SQ_PERF_SNAPSHOT_DATA, isGFX940},
224- {{" HW_REG_SQ_PERF_SNAPSHOT_DATA1" }, ID_SQ_PERF_SNAPSHOT_DATA1, isGFX940},
225- {{" HW_REG_SQ_PERF_SNAPSHOT_PC_LO" }, ID_SQ_PERF_SNAPSHOT_PC_LO, isGFX940},
226- {{" HW_REG_SQ_PERF_SNAPSHOT_PC_HI" }, ID_SQ_PERF_SNAPSHOT_PC_HI, isGFX940},
227-
228- // GFX1250
229- {{" HW_REG_XNACK_STATE_PRIV" }, ID_XNACK_STATE_PRIV, isGFX1250},
230- {{" HW_REG_XNACK_MASK" }, ID_XNACK_MASK_gfx1250, isGFX1250},
231-
232- // Aliases
233- {{" HW_REG_HW_ID" }, ID_HW_ID1, isGFX10},
165+ {{" HW_REG_WAVE_MODE" }, ID_MODE, isGFX12Plus},
166+ {{" HW_REG_MODE" }, ID_MODE},
167+ {{" HW_REG_WAVE_STATUS" }, ID_STATUS, isGFX12Plus},
168+ {{" HW_REG_STATUS" }, ID_STATUS},
169+ {{" HW_REG_TRAPSTS" }, ID_TRAPSTS, isNotGFX12Plus},
170+ {{" HW_REG_HW_ID" }, ID_HW_ID, isNotGFX10Plus},
171+ {{" HW_REG_WAVE_STATE_PRIV" }, ID_STATE_PRIV, isGFX12Plus},
172+ {{" HW_REG_STATE_PRIV" }, ID_STATE_PRIV, isGFX12Plus},
173+ {{" HW_REG_WAVE_GPR_ALLOC" }, ID_GPR_ALLOC, isGFX12Plus},
174+ {{" HW_REG_GPR_ALLOC" }, ID_GPR_ALLOC},
175+ {{" HW_REG_WAVE_LDS_ALLOC" }, ID_LDS_ALLOC, isGFX12Plus},
176+ {{" HW_REG_LDS_ALLOC" }, ID_LDS_ALLOC},
177+ {{" HW_REG_IB_STS" }, ID_IB_STS},
178+ {{" HW_REG_PERF_SNAPSHOT_DATA" }, ID_PERF_SNAPSHOT_DATA_gfx12, isGFX12Plus},
179+ {{" HW_REG_PERF_SNAPSHOT_PC_LO" }, ID_PERF_SNAPSHOT_PC_LO_gfx12, isGFX12Plus},
180+ {{" HW_REG_PERF_SNAPSHOT_PC_HI" }, ID_PERF_SNAPSHOT_PC_HI_gfx12, isGFX12Plus},
181+ {{" HW_REG_SH_MEM_BASES" }, ID_MEM_BASES, isGFX9_GFX10_GFX11},
182+ {{" HW_REG_PERF_SNAPSHOT_DATA1" }, ID_PERF_SNAPSHOT_DATA1, isGFX12Plus},
183+ {{" HW_REG_TBA_LO" }, ID_TBA_LO, isGFX9_GFX10},
184+ {{" HW_REG_PERF_SNAPSHOT_DATA2" }, ID_PERF_SNAPSHOT_DATA2, isGFX12Plus},
185+ {{" HW_REG_TBA_HI" }, ID_TBA_HI, isGFX9_GFX10},
186+ {{" HW_REG_WAVE_EXCP_FLAG_PRIV" }, ID_EXCP_FLAG_PRIV, isGFX12Plus},
187+ {{" HW_REG_EXCP_FLAG_PRIV" }, ID_EXCP_FLAG_PRIV, isGFX12Plus},
188+ {{" HW_REG_TMA_LO" }, ID_TMA_LO, isGFX9_GFX10},
189+ {{" HW_REG_PERF_SNAPSHOT_PC_LO" }, ID_PERF_SNAPSHOT_PC_LO_gfx11, isGFX11},
190+ {{" HW_REG_WAVE_EXCP_FLAG_USER" }, ID_EXCP_FLAG_USER, isGFX12Plus},
191+ {{" HW_REG_EXCP_FLAG_USER" }, ID_EXCP_FLAG_USER, isGFX12Plus},
192+ {{" HW_REG_TMA_HI" }, ID_TMA_HI, isGFX9_GFX10},
193+ {{" HW_REG_PERF_SNAPSHOT_PC_HI" }, ID_PERF_SNAPSHOT_PC_HI_gfx11, isGFX11},
194+ {{" HW_REG_WAVE_TRAP_CTRL" }, ID_TRAP_CTRL, isGFX12Plus},
195+ {{" HW_REG_TRAP_CTRL" }, ID_TRAP_CTRL, isGFX12Plus},
196+ {{" HW_REG_FLAT_SCR_LO" }, ID_FLAT_SCR_LO, isGFX10_GFX11},
197+ {{" HW_REG_WAVE_SCRATCH_BASE_LO" }, ID_FLAT_SCR_LO, isGFX12Plus},
198+ {{" HW_REG_SCRATCH_BASE_LO" }, ID_FLAT_SCR_LO, isGFX12Plus},
199+ {{" HW_REG_XCC_ID" }, ID_XCC_ID, isGFX940},
200+ {{" HW_REG_FLAT_SCR_HI" }, ID_FLAT_SCR_HI, isGFX10_GFX11},
201+ {{" HW_REG_WAVE_SCRATCH_BASE_HI" }, ID_FLAT_SCR_HI, isGFX12Plus},
202+ {{" HW_REG_SCRATCH_BASE_HI" }, ID_FLAT_SCR_HI, isGFX12Plus},
203+ {{" HW_REG_SQ_PERF_SNAPSHOT_DATA" }, ID_SQ_PERF_SNAPSHOT_DATA, isGFX940},
204+ {{" HW_REG_XNACK_MASK" }, ID_XNACK_MASK, isGFX10Before1030},
205+ {{" HW_REG_SQ_PERF_SNAPSHOT_DATA1" }, ID_SQ_PERF_SNAPSHOT_DATA1, isGFX940},
206+ {{" HW_REG_WAVE_HW_ID1" }, ID_HW_ID1, isGFX12Plus},
207+ {{" HW_REG_HW_ID1" }, ID_HW_ID1, isGFX10Plus},
208+ {{" HW_REG_HW_ID" }, ID_HW_ID1, isGFX10},
209+ {{" HW_REG_SQ_PERF_SNAPSHOT_PC_LO" }, ID_SQ_PERF_SNAPSHOT_PC_LO, isGFX940},
210+ {{" HW_REG_WAVE_HW_ID2" }, ID_HW_ID2, isGFX12Plus},
211+ {{" HW_REG_HW_ID2" }, ID_HW_ID2, isGFX10Plus},
212+ {{" HW_REG_SQ_PERF_SNAPSHOT_PC_HI" }, ID_SQ_PERF_SNAPSHOT_PC_HI, isGFX940},
213+ {{" HW_REG_POPS_PACKER" }, ID_POPS_PACKER, isGFX10},
214+ {{" HW_REG_PERF_SNAPSHOT_DATA" }, ID_PERF_SNAPSHOT_DATA_gfx11, isGFX11},
215+ {{" HW_REG_IB_STS2" }, ID_IB_STS2, isGFX1250},
216+ {{" HW_REG_SHADER_CYCLES" }, ID_SHADER_CYCLES, isGFX10_3_GFX11},
217+ {{" HW_REG_SHADER_CYCLES_LO" }, ID_SHADER_CYCLES, isGFX12Plus},
218+ {{" HW_REG_SHADER_CYCLES_HI" }, ID_SHADER_CYCLES_HI, isGFX12Plus},
219+ {{" HW_REG_WAVE_DVGPR_ALLOC_LO" }, ID_DVGPR_ALLOC_LO, isGFX12Plus},
220+ {{" HW_REG_DVGPR_ALLOC_LO" }, ID_DVGPR_ALLOC_LO, isGFX12Plus},
221+ {{" HW_REG_WAVE_DVGPR_ALLOC_HI" }, ID_DVGPR_ALLOC_HI, isGFX12Plus},
222+ {{" HW_REG_DVGPR_ALLOC_HI" }, ID_DVGPR_ALLOC_HI, isGFX12Plus},
223+ {{" HW_REG_XNACK_STATE_PRIV" }, ID_XNACK_STATE_PRIV, isGFX1250},
224+ {{" HW_REG_XNACK_MASK" }, ID_XNACK_MASK_gfx1250, isGFX1250},
225+
234226};
235227// clang-format on
236228// NOLINTEND
0 commit comments